Skip to content

Commit

Permalink
Bug repair (#60)
Browse files Browse the repository at this point in the history
* Better memory management on smaller GPUs
* Correct issue when non-numpy array patterns (like Dask arrays) are sent to be indexed.
Signed-off by: David Rowenhorst <david.rowenhorst@nrl.navy.mil>
  • Loading branch information
drowenhorst-nrl authored May 30, 2024
1 parent 8c09e6b commit ee544d9
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 7 deletions.
2 changes: 2 additions & 0 deletions pyebsdindex/opencl/band_detect_cl.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ def find_bands(self, patternsIn, verbose=0, clparams=None, chunksize=528, useCPU
def radon_fasterCL(self,image,padding = np.array([0,0]), fixArtifacts = False, background = None, returnBuff = True, clparams=None ):
# this function executes the radon sumations on the GPU
tic = timer()
image = np.asarray(image)

# make sure we have an OpenCL environment
if clparams is not None:
if clparams.queue is None:
Expand Down
10 changes: 8 additions & 2 deletions pyebsdindex/opencl/nlpar_cl.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=Fa


sigmachunk_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=sigmachunk.nbytes)

cl.enqueue_barrier(queue)
prg.calcsigma(queue, (np.uint32(ncolchunk), np.uint32(nrowchunk)), None,
datapad_gpu, mask_gpu,sigmachunk_gpu,
Expand Down Expand Up @@ -404,7 +405,7 @@ def calcnlpar_cl(self, searchradius=None, lam = None, dthresh = None, saturation
clvectlen = 16



# print("target mem:", target_mem)
chunks = self._calcchunks( [pwidth, pheight], ncols, nrows, target_bytes=target_mem,
col_overlap=sr, row_overlap=sr)
#print(chunks[2], chunks[3])
Expand All @@ -426,10 +427,14 @@ def calcnlpar_cl(self, searchradius=None, lam = None, dthresh = None, saturation
nchunks = chunksize.size
#return chunks, chunksize
mxchunk = int(chunksize.max())
# print("max chunk:" , mxchunk)

npadmx = clvectlen * int(np.ceil(float(mxchunk)*npat_point/ clvectlen))

datapad_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=int(npadmx) * int(4))
datapadout_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=int(npadmx) * int(4))
# print("data pad", datapad_gpu.size)
# print("data out", datapadout_gpu.size)

nnn = int((2 * sr + 1) ** 2)

Expand Down Expand Up @@ -469,14 +474,15 @@ def calcnlpar_cl(self, searchradius=None, lam = None, dthresh = None, saturation

sigmachunk = np.ascontiguousarray(sigma[rstart:rend, cstart:cend].astype(np.float32))
sigmachunk_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=sigmachunk)
# print("sigma", sigmachunk_gpu.size)
szdata = data.size
npad = clvectlen * int(np.ceil(szdata / clvectlen))

#datapad = np.zeros((npad), dtype=np.float32) + np.float32(mxval + 10)
#datapad[0:szdata] = data.reshape(-1)

data_gpu = cl.Buffer(ctx,mf.READ_ONLY | mf.COPY_HOST_PTR,hostbuf=data)

# print("data", data_gpu.size)
if data.dtype.type is np.float32:
prg.nlloadpat32flt(queue, (np.uint64(data.size),1), None, data_gpu, datapad_gpu, wait_for=[filldatain])
if data.dtype.type is np.ubyte:
Expand Down
13 changes: 9 additions & 4 deletions pyebsdindex/opencl/nlpar_clray.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,10 @@ def calcsigma_clray(self, nn=1, saturation_protect=True, automask=True, normaliz
normalize_d=normalize_d,
gpu_id=gpu_id, **kwargs)

target_mem = clparams.gpu[gpu_id].max_mem_alloc_size // 3
max_mem = clparams.gpu[gpu_id].global_mem_size * 0.75
target_mem = clparams.gpu[gpu_id].max_mem_alloc_size // 2
max_mem = clparams.gpu[gpu_id].global_mem_size * 0.5
if target_mem * ngpuwrker > max_mem:
#print('revisemem:')
target_mem = max_mem / ngpuwrker

patternfile = self.getinfileobj()
Expand Down Expand Up @@ -479,7 +480,7 @@ def calcnlpar_clray(self, searchradius=None, lam = None, dthresh = None, saturat
gpu_id= gpu_id)

target_mem = clparams.gpu[gpu_id].max_mem_alloc_size//3
max_mem = clparams.gpu[gpu_id].global_mem_size*0.75
max_mem = clparams.gpu[gpu_id].global_mem_size*0.4
if target_mem*ngpuwrker > max_mem:
target_mem = max_mem/ngpuwrker
#print(target_mem/1.0e9)
Expand Down Expand Up @@ -545,7 +546,7 @@ def calcnlpar_clray(self, searchradius=None, lam = None, dthresh = None, saturat
if len(jobqueue) > 0:
if len(idlewrker) > 0:
wrker = idlewrker.pop()
job = jobqueue.pop()
job = jobqueue.pop(0)

tasks.append(wrker.runnlpar_chunk.remote(job, nlparobj=nlpar_remote))
busywrker.append(wrker)
Expand All @@ -561,6 +562,10 @@ def calcnlpar_clray(self, searchradius=None, lam = None, dthresh = None, saturat
ndone += 1
if verbose >= 2:
print("tiles complete: ", ndone, "/", njobs, sep='', end='\r')
else: #An error has occured ... hopefully just need a re-process.
jobqueue.append(job)
print(message)

if verbose >= 2:
print('\n', end='')
return str(self.patternfileout.filepath)
Expand Down
2 changes: 1 addition & 1 deletion pyebsdindex/opencl/openclparam.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from os import path
import pyopencl as cl
from os import environ
environ['PYOPENCL_COMPILER_OUTPUT'] = '1'
environ['PYOPENCL_COMPILER_OUTPUT'] = '0'

RADDEG = 180.0/np.pi
DEGRAD = np.pi/180.0
Expand Down
3 changes: 3 additions & 0 deletions pyebsdindex/radon_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ def radon_fast(self, imageIn, padding = np.array([0,0]), fixArtifacts = False,

def radon_faster(self,imageIn,padding = np.array([0,0]), fixArtifacts = False, background = None, normalization=True):
tic = timer()

shapeIm = np.shape(imageIn)
if imageIn.ndim == 2:
nIm = 1
Expand All @@ -244,11 +245,13 @@ def radon_faster(self,imageIn,padding = np.array([0,0]), fixArtifacts = False, b
nIm = shapeIm[0]
# reform = False


if background is None:
image = (imageIn.reshape(-1)).astype(np.float32)
else:
image = imageIn - background
image = (image.reshape(-1)).astype(np.float32)
image = np.asarray(image)

nPx = shapeIm[-1]*shapeIm[-2]
indxDim = np.asarray(self.indexPlan.shape)
Expand Down

0 comments on commit ee544d9

Please sign in to comment.