Develop (#66)

- Fixed issue for new versions of Ray on Ubuntu and NVIDIA that prevented GPU indexing and multiprocessing. - Small adjustments to peak fitting suggested by Will Lenthe. Signed-off by: David Rowenhorst <david.rowenhorst@nrl.navy.mil>
USNavalResearchLaboratory · Aug 6, 2024 · 54961b2 · 54961b2
1 parent d39e7eb
commit 54961b2
Show file tree

Hide file tree

Showing 5 changed files with 77 additions and 18 deletions.
diff --git a/pyebsdindex/__init__.py b/pyebsdindex/__init__.py
@@ -11,17 +11,10 @@
 
 
 # Try to import only once - also will perform check that at least one GPU is found.
+
 try:
-    _pyopencl_installed = False
     import pyopencl
-    from pyebsdindex.opencl import openclparam
-    testcl = openclparam.OpenClParam()
-    try:
-        gpu = testcl.get_gpu()
-        if len(gpu) > 0:
-            _pyopencl_installed = True
-    except:
-        raise ImportError('pyopencl could not find GPU')
+    _pyopencl_installed = True
 except ImportError:
     _pyopencl_installed = False
 

diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py
@@ -360,6 +360,7 @@ def index_pats_distributed(
         _node_ip_address=RAYIPADDRESS, #"0.0.0.0",
         runtime_env={"env_vars":
                       {"PYTHONPATH": os.path.dirname(os.path.dirname(__file__)),
+                       "CUDA_VISIBLE_DEVICES": cudagpuvis,
                       }},
         logging_level=logging.WARNING,
     )  # Supress INFO messages from ray.

diff --git a/pyebsdindex/band_detect.py b/pyebsdindex/band_detect.py
@@ -644,6 +644,8 @@ def band_label_numba(nBands,nPats,nRho,nTheta,rdnConv,rdnPad,lMaxRdn):
         bandData_avemax[q,i] = sumnn / nnN
         # rnn = np.sum(nn * (np.float32(r) + nnr))
         # cnn = np.sum(nn * (np.float32(c) + nnc))
+        #dx = 0.125 * (2.0 * (nn[1,2] - nn[1,0]) + (nn[0,2] - nn[0,0]) + (nn[2,2] - nn[2,0]))
+        #dy = 0.125 * (2.0 * (nn[2,1] - nn[0,1]) + (nn[2,0] - nn[0,0]) + (nn[2,2] - nn[0,2]))
         dx  = 0.5*(nn[1,2] - nn[1,0])
         dy  = 0.5*(nn[2,1] - nn[0,1])
         dxx = nn[1,2] + nn[1,0] - 2 * nn[1,1]
@@ -655,8 +657,17 @@ def band_label_numba(nBands,nPats,nRho,nTheta,rdnConv,rdnPad,lMaxRdn):
         det = 1.0/det
         dc =  (dyy * dx - dxy * dy) * det
         rc = (dxx * dy - dxy * dx) * det
-        dc = max(-1.0, dc) ; rc = max(-1.0, rc)
-        dc = min(1.0, dc) ;  rc = min(1.0, rc)
+        # protect against a bad dxy estimate, assume dxy == 0.0 -- per suggestion of W. Lenthe
+        if (np.abs(dc) > 0.875) or (np.abs(rc) > 0.875):
+          det = (dxx * dyy)
+          det = det if np.fabs(det) > 1e-12 else 1.0e-12
+          det = 1.0 / det
+          dc = (dyy * dx) * det
+          rc = (dxx * dy) * det
+          if (np.abs(dc) > 0.875) or (np.abs(rc) > 0.875):
+            dc = 0.0 ; rc = 0.0
+        # dc = max(-1.0, dc) ; rc = max(-1.0, rc)
+        # dc = min(1.0, dc) ;  rc = min(1.0, rc)
         cnn = c - dc
         rnn = r - rc
         bandData_aveloc[q,i,:] = np.array([rnn,cnn])

diff --git a/pyebsdindex/nlpar.py b/pyebsdindex/nlpar.py
@@ -33,10 +33,27 @@
 from pyebsdindex import _ray_installed
 from pyebsdindex import _pyopencl_installed
 
-
-if _ray_installed and _pyopencl_installed:
+gpuisthere = False
+if _pyopencl_installed:
+  # check for at least one gpu
+  import pyopencl as cl
+  try:
+    plt = cl.get_platforms()
+    if len(plt) > 0:
+      for p in plt:
+        g = p.get_devices(device_type=cl.device_type.GPU)
+        if len(g) > 0:
+          gpuisthere = True
+          g = None
+          break
+    plt = None
+  except:
+    pass
+
+
+if _ray_installed and gpuisthere:
   from pyebsdindex.opencl.nlpar_clray import NLPAR
-elif _pyopencl_installed and not _ray_installed:
+elif gpuisthere and not _ray_installed:
   from pyebsdindex.opencl.nlpar_cl import NLPAR
 else:
   from pyebsdindex.nlpar_cpu import NLPAR

diff --git a/pyebsdindex/opencl/clkernels.cl b/pyebsdindex/opencl/clkernels.cl
@@ -654,11 +654,15 @@ __kernel void maxlabel( __global const uchar *maxlocin,__global const float *max
       imVal2 = maxvalin[indxy]; 
       avetempweight += imVal2;
       dxy += imVal2; 
+
+      //dx -= imVal2; 
+      //dy -= imVal2;
 
       indxy = ((y-1)*imszx + (x))*nImChunk+z; 
       imVal2 = maxvalin[indxy]; 
       avetempweight += imVal2;
       dy  -= imVal2;
+      //dy -= 2.0 * imVal2;
       dyy += imVal2;
       imValym1 = imVal2; 
 
@@ -668,10 +672,14 @@ __kernel void maxlabel( __global const uchar *maxlocin,__global const float *max
       avetempweight += imVal2;
       dxy -= imVal2;
 
+      //dx += imVal2;
+      //dy -= imVal2;
+
       indxy = ((y)*imszx + (x-1))*nImChunk+z; 
       imVal2 = maxvalin[indxy]; 
       avetempweight += imVal2;
       dx -= imVal2; 
+      //dx -= 2.0 * imVal2; 
       dxx += imVal2; 
 
       indxy = ((y)*imszx + (x))*nImChunk+z; 
@@ -685,17 +693,24 @@ __kernel void maxlabel( __global const uchar *maxlocin,__global const float *max
       imVal2 = maxvalin[indxy]; 
       avetempweight += imVal2;
       dx  +=  imVal2;
+      //dx += 2.0 * imVal2;
       dxx +=  imVal2;
 
+
+
       indxy = ((y+1)*imszx + (x-1))*nImChunk+z; 
       imVal2 = maxvalin[indxy]; 
       avetempweight += imVal2;
       dxy -=  imVal2;
+
+      //dx -= imVal2;
+      //dy += imVal2;
 
       indxy = ((y+1)*imszx + (x))*nImChunk+z; 
       imVal2 = maxvalin[indxy]; 
       avetempweight += imVal2;
       dy  +=  imVal2;
+      //dy += 2.0 * imVal2;
       dyy += imVal2; 
       imValyp1 = imVal2;
 
@@ -704,17 +719,39 @@ __kernel void maxlabel( __global const uchar *maxlocin,__global const float *max
       avetempweight += imVal2;
       dxy  +=  imVal2;
 
+      //dx += imVal2;
+      //dy += imVal2; 
+
       dxy *= 0.25;
-      dx  *= 0.5;
-      dy  *= 0.5;
+      dx  *= 0.5; //0.125; //
+      dy  *= 0.5; //0.125; //
 
       det = (dxx*dyy - dxy*dxy);
       det = (fabs(det) > 1.0e-12) ? det : 1.0e-12; 
       det = 1.0 / det; 
 
-      ix = (float) x - (dyy * dx - dxy * dy) * det; 
-      iy = (float) y - (dxx * dy - dxy * dx) * det; 
+      //ix = (float) x - (dyy * dx - dxy * dy) * det; 
+      //iy = (float) y - (dxx * dy - dxy * dx) * det; 
+
+      ix = (dyy * dx - dxy * dy) * det; 
+      iy = (dxx * dy - dxy * dx) * det;
+      // protect against bad estimates in dxy, assume == 0.0 -- per suggestion of W. Lenthe
+      if ( (fabs(ix) > 0.875) || (fabs(iy) > 0.875) ){
+        det = (dxx*dyy);
+        det = (fabs(det) > 1.0e-12) ? det : 1.0e-12; 
+        det = 1.0 / det; 
+        ix = (dyy * dx) * det; 
+        iy = (dxx * dy) * det;
+        if ( (fabs(ix) > 0.875) || (fabs(iy) > 0.875) ){
+          ix = 0.0; 
+          iy = 0.0;
+        }
 
+      }
+
+      ix = (float) x - ix;
+      iy = (float) y - iy;
+
       aveloc[z*lnmax + i] = (float2) (iy, ix); 
       aveval[z*lnmax + i] = avetempweight/9.0;
       // band width metric