Skip to content

Commit

Permalink
Merge pull request #2 from melissalinkert/more-sharding
Browse files Browse the repository at this point in the history
More sharding work
  • Loading branch information
sbesson authored Aug 26, 2024
2 parents a0ecef2 + 5620d04 commit 4af88fb
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 11 deletions.
49 changes: 41 additions & 8 deletions src/main/java/com/glencoesoftware/zarr/Convert.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ public class Convert implements Callable<Integer> {
private boolean writeV2;

private ShardConfiguration shardConfig;
private int[] requestedShard; // the requested size for custom sharding
private String[] codecs;

/**
Expand Down Expand Up @@ -136,8 +137,12 @@ public void setSharding(String shard) {
shardConfig = Enum.valueOf(ShardConfiguration.class, shard);
}
catch (IllegalArgumentException e) {
// TODO
shardConfig = ShardConfiguration.CUSTOM;
String[] shardSize = shard.split(",");
requestedShard = new int[shardSize.length];
for (int i=0; i<shardSize.length; i++) {
requestedShard[i] = Integer.parseInt(shardSize[i]);
}
}
}
}
Expand Down Expand Up @@ -305,12 +310,20 @@ public void convertToV3() throws Exception {
// no changes needed
break;
case SUPERCHUNK:
// each shard covers 2x2 chunks
// each shard covers 2x2 chunks in XY
chunkSizes[4] *= 2;
chunkSizes[3] *= 2;

// shard across other dimensions too, but only
// if the dimension is greater than the chunk size
for (int i=0; i<=2; i++) {
if (shape[i] > chunkSizes[i]) {
chunkSizes[i] *= 2;
}
}
break;
case CUSTOM:
// TODO
chunkSizes = requestedShard;
break;
}

Expand All @@ -319,6 +332,8 @@ public void convertToV3() throws Exception {
}
else {
LOGGER.warn("Skipping sharding due to incompatible sizes");
LOGGER.debug(" tried chunk={}, shard={}",
Arrays.toString(originalChunkSizes), Arrays.toString(chunkSizes));
chunkSizes = originalChunkSizes;
}
}
Expand Down Expand Up @@ -365,8 +380,29 @@ else if (codecName.equals("blosc")) {
gridPosition[0] = t;
LOGGER.debug("copying chunk of size {} at position {}",
Arrays.toString(originalChunkSizes), Arrays.toString(gridPosition));
Object bytes = tile.read(originalChunkSizes, gridPosition);
outputArray.write(Utils.toLongArray(gridPosition), NetCDF_Util.createArrayWithGivenStorage(bytes, originalChunkSizes));

// adjust the chunk size to handle edges
// otherwise the array writing will throw an exception if the
// array shape is not an exact multiple of the chunk size
int[] thisChunkSize = new int[originalChunkSizes.length];
System.arraycopy(originalChunkSizes, 0, thisChunkSize, 0, thisChunkSize.length);
if (x + tileX > shape[3]) {
thisChunkSize[3] = shape[3] - x;
}
if (y + tileY > shape[4]) {
thisChunkSize[4] = shape[4] - y;
}
if (z + originalChunkSizes[2] > shape[2]) {
thisChunkSize[2] = shape[2] - z;
}
if (c + originalChunkSizes[1] > shape[1]) {
thisChunkSize[1] = shape[1] - c;
}
if (t + originalChunkSizes[0] > shape[0]) {
thisChunkSize[0] = shape[0] - t;
}
Object bytes = tile.read(thisChunkSize, gridPosition);
outputArray.write(Utils.toLongArray(gridPosition), NetCDF_Util.createArrayWithGivenStorage(bytes, thisChunkSize));
}
}
}
Expand Down Expand Up @@ -589,9 +625,6 @@ private DataType getV2Type(dev.zarr.zarrjava.v3.DataType v3) {
*/
private boolean chunkAndShardCompatible(int[] chunkSize, int[] shardSize, int[] shape) {
for (int d=0; d<shape.length; d++) {
if (shape[d] % shardSize[d] != 0) {
return false;
}
if (shardSize[d] % chunkSize[d] != 0) {
return false;
}
Expand Down
128 changes: 125 additions & 3 deletions src/test/java/com/glencoesoftware/zarr/test/ConversionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ public void testCodecs() throws Exception {
}

/**
* Test different sharding options
* Test different default sharding options
*/
@Test
public void testSharding() throws Exception {
Expand Down Expand Up @@ -375,12 +375,134 @@ public void testSharding() throws Exception {
}
}

@Test
public void test3DSharding() throws Exception {
input = fake("sizeX", "4096", "sizeY", "4096", "sizeZ", "10");
// start with a Z chunk size of 2 (== 5 Z chunks)
assertBioFormats2Raw("-z", "2");

String[] shardOptions = new String[] {
"SINGLE", "CHUNK", "SUPERCHUNK"
};
int[][] shardSizes = new int[][] {
{1, 1, 10, 4096, 4096},
{1, 1, 2, 1024, 1024},
{1, 1, 4, 2048, 2048}
};

for (int opt=0; opt<shardOptions.length; opt++) {
// first convert v2 produced by bioformats2raw to v3
Path v3Output = tmp.newFolder().toPath().resolve("v3-test");
Convert v3Converter = new Convert();
v3Converter.setInput(output.toString());
v3Converter.setOutput(v3Output.toString());

v3Converter.setSharding(shardOptions[opt]);
v3Converter.convertToV3();

// check list of codecs in the v3 arrays

Store store = new FilesystemStore(v3Output);
Array resolution = Array.open(store.resolve("0", "0"));

int[] shardSize = shardSizes[opt];
Assert.assertArrayEquals(resolution.metadata.chunkShape(), shardSize);

// now convert v3 back to v2
Path roundtripOutput = tmp.newFolder().toPath().resolve("v2-roundtrip-test");
Convert v2Converter = new Convert();
v2Converter.setInput(v3Output.toString());
v2Converter.setOutput(roundtripOutput.toString());
v2Converter.setWriteV2(true);
v2Converter.convertToV2();

Path originalOMEXML = output.resolve("OME").resolve("METADATA.ome.xml");
Path roundtripOMEXML = roundtripOutput.resolve("OME").resolve("METADATA.ome.xml");

// make sure the OME-XML is present and not changed
Assert.assertEquals(Files.readAllLines(originalOMEXML), Files.readAllLines(roundtripOMEXML));

// since the image is small, make sure all pixels are identical in both resolutions
for (int r=0; r<4; r++) {
ZarrArray original = ZarrGroup.open(output.resolve("0")).openArray(String.valueOf(r));
ZarrArray roundtrip = ZarrGroup.open(roundtripOutput.resolve("0")).openArray(String.valueOf(r));

compareZarrArrays(original, roundtrip);
}
}
}

/**
* Test different custom sharding options
*/
@Test
public void testCustomSharding() throws Exception {
input = fake("sizeX", "4096", "sizeY", "4096", "sizeT", "2", "sizeC", "3");
assertBioFormats2Raw();

String[] shardOptions = new String[] {
"1,1,1,2048,2048",
"2,1,1,1024,1024",
"1,3,1,4096,4096"
};
int[][] shardSizes = new int[][] {
{1, 1, 1, 2048, 2048},
{2, 1, 1, 1024, 1024},
{1, 3, 1, 4096, 4096}
};

for (int opt=0; opt<shardOptions.length; opt++) {
// first convert v2 produced by bioformats2raw to v3
Path v3Output = tmp.newFolder().toPath().resolve("v3-test");
Convert v3Converter = new Convert();
v3Converter.setInput(output.toString());
v3Converter.setOutput(v3Output.toString());

v3Converter.setSharding(shardOptions[opt]);
v3Converter.convertToV3();

// check list of codecs in the v3 arrays

Store store = new FilesystemStore(v3Output);
Array resolution = Array.open(store.resolve("0", "0"));

int[] shardSize = shardSizes[opt];
Assert.assertArrayEquals(resolution.metadata.chunkShape(), shardSize);

// now convert v3 back to v2
Path roundtripOutput = tmp.newFolder().toPath().resolve("v2-roundtrip-test");
Convert v2Converter = new Convert();
v2Converter.setInput(v3Output.toString());
v2Converter.setOutput(roundtripOutput.toString());
v2Converter.setWriteV2(true);
v2Converter.convertToV2();

Path originalOMEXML = output.resolve("OME").resolve("METADATA.ome.xml");
Path roundtripOMEXML = roundtripOutput.resolve("OME").resolve("METADATA.ome.xml");

// make sure the OME-XML is present and not changed
Assert.assertEquals(Files.readAllLines(originalOMEXML), Files.readAllLines(roundtripOMEXML));

// since the image is small, make sure all pixels are identical in both resolutions
for (int r=0; r<4; r++) {
ZarrArray original = ZarrGroup.open(output.resolve("0")).openArray(String.valueOf(r));
ZarrArray roundtrip = ZarrGroup.open(roundtripOutput.resolve("0")).openArray(String.valueOf(r));

compareZarrArrays(original, roundtrip);
}
}
}

private void compareZarrArrays(ZarrArray original, ZarrArray roundtrip) throws Exception {
Assert.assertArrayEquals(original.getShape(), roundtrip.getShape());

int[] shape = original.getShape();
byte[] originalImage = new byte[shape[3] * shape[4]];
byte[] roundtripImage = new byte[shape[3] * shape[4]];
int arraySize = 1;
for (int s : shape) {
arraySize *= s;
}
byte[] originalImage = new byte[arraySize];
byte[] roundtripImage = new byte[arraySize];
original.read(originalImage, shape);
roundtrip.read(roundtripImage, shape);

Expand Down

0 comments on commit 4af88fb

Please sign in to comment.