diff --git a/Deeploy/Targets/Neureka/Engine.py b/Deeploy/Targets/Neureka/Engine.py index 2585b1a688..01d5156e66 100644 --- a/Deeploy/Targets/Neureka/Engine.py +++ b/Deeploy/Targets/Neureka/Engine.py @@ -31,11 +31,12 @@ ConvLayer([NeurekaPWConv2DMapper, NeurekaDWConv2DMapper, NeurekaDenseConv2DMapper]), } -_includeList = ["pulp_nnx_neureka.h", "pulp_nnx_util.h", "neureka_siracusa_bsp.h", "neureka.h", "neureka_task.h"] +_includeList = ["pulp_nnx_neureka.h", "pulp_nnx_util.h", "neureka_siracusa_bsp.h", "neureka.h", "neureka_task.h", "neureka_gvsoc.h"] _neurekaInitCode = r""" neureka_siracusa_conf_t conf = {.max_stall = 8}; neureka_nnx_init(neureka_siracusa_get_dev(), &conf); +neureka_gvsoc_log_activate(neureka_siracusa_get_dev(), NEUREKA_GVSOC_LOG_LEVEL_ALL, NEUREKA_GVSOC_LOG_FORMAT_DECIMAL); """ diff --git a/Deeploy/Targets/Neureka/Parsers.py b/Deeploy/Targets/Neureka/Parsers.py index 3c564c10b2..767d26c5af 100644 --- a/Deeploy/Targets/Neureka/Parsers.py +++ b/Deeploy/Targets/Neureka/Parsers.py @@ -50,14 +50,18 @@ def parseNodeCtxt(self, # and enforcing that the channels_first is false data_in = newCtxt.lookup(self.operatorRepresentation['data_in']) data_out = newCtxt.lookup(self.operatorRepresentation['data_out']) - weight = newCtxt.lookup(self.operatorRepresentation['weight']) + # MARCHIOA: weight depends on the type of convolution so it requires to be parsed by the child parsers + # - PW -> 3-dim + # - DW -> 4-dim + # - Dense -> 4-dim + # weight = newCtxt.lookup(self.operatorRepresentation['weight']) if not all([ channels_first == False, len(data_in.shape) == 4, - # LMACAN: weight shape should be equal to 3 because we have to do the neureka's - # special weight encoding - len(weight.shape) == 3, + # # LMACAN: weight shape should be equal to 3 because we have to do the neureka's + # # special weight encoding + # len(weight.shape) == 3, ]): return newCtxt, False @@ -95,6 +99,18 @@ def parseNode(self, node: gs.Node) -> bool: return True + def parseNodeCtxt(self, ctxt, node, channels_first = True): + + newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first) + if not ret: + return False + + weight = newCtxt.lookup(self.operatorRepresentation['weight']) + if not (len(weight.shape) == 4): + return False + + return newCtxt, True + class NeurekaRQSDWConv2DParser(NeurekaDWConv2DParser, RQSParserInterface): @@ -136,6 +152,18 @@ def parseNode(self, node: gs.Node) -> bool: return True + def parseNodeCtxt(self, ctxt, node, channels_first = True): + + newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first) + if not ret: + return False + + weight = newCtxt.lookup(self.operatorRepresentation['weight']) + if not (len(weight.shape) == 3): + return False + + return newCtxt, True + class NeurekaRQSPWConv2DParser(NeurekaPWConv2DParser, RQSParserInterface): @@ -176,6 +204,18 @@ def parseNode(self, node: gs.Node) -> bool: return True + def parseNodeCtxt(self, ctxt, node, channels_first = True): + + newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first) + if not ret: + return False + + weight = newCtxt.lookup(self.operatorRepresentation['weight']) + if not (len(weight.shape) == 4): + return False + + return newCtxt, True + class NeurekaRQSDenseConv2DParser(NeurekaDenseConv2DParser, RQSParserInterface): diff --git a/Deeploy/Targets/Neureka/Templates/ConvTemplate.py b/Deeploy/Targets/Neureka/Templates/ConvTemplate.py index 97253d6e12..04968cf905 100644 --- a/Deeploy/Targets/Neureka/Templates/ConvTemplate.py +++ b/Deeploy/Targets/Neureka/Templates/ConvTemplate.py @@ -256,12 +256,12 @@ def getCounters( operatorRepresentation: OperatorRepresentation) -> Tuple[int, int, int, int, int, int, int, int, int, int]: _ = operatorRepresentation # operatorRepresentation not accessed for now because it's just for pointwise kernels - n_channel_out_subtiles = _getNumTiles(channel_out, 28) + n_channel_out_subtiles = _getNumTiles(channel_out, 32) n_channel_in_subtiles = _getNumTiles(channel_in, 28) n_height_out_subtiles = _getNumTiles(height_out, 6) n_width_out_subtiles = _getNumTiles(width_out, 6) - channel_out_border = _getBorderTileSize(channel_out, 28) + channel_out_border = _getBorderTileSize(channel_out, 32) channel_in_border = _getBorderTileSize(channel_in, 28) height_out_border = _getBorderTileSize(height_out, 6) width_out_border = _getBorderTileSize(width_out, 6) diff --git a/Deeploy/Targets/Neureka/TileConstraints/NeurekaDenseConstraint.py b/Deeploy/Targets/Neureka/TileConstraints/NeurekaDenseConstraint.py index 814024a877..e4c20e65e5 100644 --- a/Deeploy/Targets/Neureka/TileConstraints/NeurekaDenseConstraint.py +++ b/Deeploy/Targets/Neureka/TileConstraints/NeurekaDenseConstraint.py @@ -218,7 +218,10 @@ def serializeTilingSolution( replacementTypes['weight_addr_offset'] = PointerClass(uint32_t) for absoluteCube in absoluteOutputCubes: COffset, CSize = absoluteCube.absoluteOffset[-1], absoluteCube.rectangle.dims[-1] - WeightCube = HyperRectangle((COffset, 0, 0), (CSize, weightShape[-2], weightShape[-1])) + # WeightCube = HyperRectangle((COffset, 0, 0), (CSize, weightShape[-2], weightShape[-1])) + WeightCube = HyperRectangle( + (COffset, 0, 0, 0), + (CSize, weightShape[-3], weightShape[-2], weightShape[-1])) replacements['weight_addr_offset'].append(calculateFlatOffsetInBytes(WeightCube, weightBuffer)) else: inputWeightBaseOffsets, outputWeightBaseOffsets = cls.extractBaseAddr(tilingSolution, targetMemLevel, diff --git a/Deeploy/Targets/Neureka/TileConstraints/NeurekaDepthwiseConstraint.py b/Deeploy/Targets/Neureka/TileConstraints/NeurekaDepthwiseConstraint.py index fd5d791119..1fe96934e5 100644 --- a/Deeploy/Targets/Neureka/TileConstraints/NeurekaDepthwiseConstraint.py +++ b/Deeploy/Targets/Neureka/TileConstraints/NeurekaDepthwiseConstraint.py @@ -214,7 +214,10 @@ def serializeTilingSolution( replacementTypes['weight_addr_offset'] = PointerClass(uint32_t) for absoluteCube in absoluteOutputCubes: COffset, CSize = absoluteCube.absoluteOffset[-1], absoluteCube.rectangle.dims[-1] - WeightCube = HyperRectangle((COffset, 0, 0), (CSize, weightShape[-2], weightShape[-1])) + # WeightCube = HyperRectangle((COffset, 0, 0), (CSize, weightShape[-2], weightShape[-1])) + WeightCube = HyperRectangle( + (COffset, 0, 0, 0), + (CSize, weightShape[-3], weightShape[-2], weightShape[-1])) replacements['weight_addr_offset'].append(calculateFlatOffsetInBytes(WeightCube, weightBuffer)) else: inputWeightBaseOffsets, outputWeightBaseOffsets = cls.extractBaseAddr(tilingSolution, targetMemLevel, diff --git a/DeeployTest/Tests/Kernels/Integer/Conv/Regular_3x3_RQ/inputs.npz b/DeeployTest/Tests/Kernels/Integer/Conv/Regular_3x3_RQ/inputs.npz new file mode 100644 index 0000000000..e07a5f5b45 Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/Conv/Regular_3x3_RQ/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/Integer/Conv/Regular_3x3_RQ/network.onnx b/DeeployTest/Tests/Kernels/Integer/Conv/Regular_3x3_RQ/network.onnx new file mode 100644 index 0000000000..34abc066f6 Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/Conv/Regular_3x3_RQ/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/Integer/Conv/Regular_3x3_RQ/outputs.npz b/DeeployTest/Tests/Kernels/Integer/Conv/Regular_3x3_RQ/outputs.npz new file mode 100644 index 0000000000..5bf0a5c9ee Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/Conv/Regular_3x3_RQ/outputs.npz differ diff --git a/DeeployTest/testUtils/deeployRunner.py b/DeeployTest/testUtils/deeployRunner.py index 71b056e9df..08b64855df 100644 --- a/DeeployTest/testUtils/deeployRunner.py +++ b/DeeployTest/testUtils/deeployRunner.py @@ -238,6 +238,10 @@ def create_config_from_args(args: argparse.Namespace, gen_args_list.append(f"--searchStrategy={args.searchStrategy}") if hasattr(args, 'plotMemAlloc') and args.plotMemAlloc: gen_args_list.append("--plotMemAlloc") + if hasattr(args, 'enable_3x3') and args.enable_3x3: + gen_args_list.append("--enable-3x3") + if hasattr(args, 'neureka_wmem') and args.neureka_wmem: + gen_args_list.append("--neureka-wmem") if not tiling and getattr(args, 'profileUntiled', False): gen_args_list.append("--profileUntiled")