diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt index 8e4c091cd01dd3a7ee72957e3e6e3a7661ac8b19..7958b8be2205ee73124e1f2e564471abddc5b861 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt @@ -8,6 +8,7 @@ foreach(TEST_INFERENCE_IR_PASS ${TEST_TRT_IR_PASSES}) endforeach() if(WITH_GPU AND TENSORRT_FOUND) + list(REMOVE_ITEM TEST_TRT_IR_PASSES test_trt_multiclass_nms_op) foreach(target ${TEST_TRT_IR_PASSES}) py_test_modules(${target} MODULES ${target}) endforeach() @@ -32,6 +33,13 @@ if(WITH_GPU AND TENSORRT_FOUND) set_tests_properties(test_trt_subgraph_pass PROPERTIES TIMEOUT 120) set_tests_properties(test_trt_activation_pass PROPERTIES TIMEOUT 120) set_tests_properties(test_trt_conv_pass PROPERTIES TIMEOUT 120) -set_tests_properties(test_trt_multiclass_nms_op PROPERTIES TIMEOUT 200) +#set_tests_properties(test_trt_multiclass_nms_op PROPERTIES TIMEOUT 200) set_tests_properties(test_trt_dynamic_shape PROPERTIES TIMEOUT 120) +set_tests_properties(test_trt_pool_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 45) +set_tests_properties(test_trt_reduce_mean_op PROPERTIES TIMEOUT 60) +set_tests_properties(test_trt_tile_op PROPERTIES TIMEOUT 60) +set_tests_properties(test_trt_convert_conv2d PROPERTIES TIMEOUT 100) +set_tests_properties(test_trt_fc_fuse_quant_dequant_pass PROPERTIES TIMEOUT 100) +set_tests_properties(test_trt_conv_quant_dequant_pass PROPERTIES TIMEOUT 100) +set_tests_properties(test_trt_matmul_quant_dequant PROPERTIES TIMEOUT 100) endif() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_quant_dequant_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_quant_dequant_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..eacdb2696896166bebc46f63cd7ab837c7b70fd2 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_quant_dequant_pass.py @@ -0,0 +1,308 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import shutil +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +from quant_dequant_test import QuantDequantTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker +from paddle.fluid.core import AnalysisConfig + + +class QuantDequantTensorRTSubgraphPassConvTest(QuantDequantTest): + def setUp(self): + self.set_params() + + def network(): + self.data = fluid.data( + name='data', shape=[1, 28, 28], dtype='float32') + data_reshape = fluid.layers.reshape(self.data, shape=[1, 4, 14, 14]) + self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') + label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1]) + conv_out = fluid.layers.conv2d( + input=data_reshape, + num_filters=self.conv_num_filters, + filter_size=self.conv_filter_size, + groups=self.conv_groups, + padding=self.conv_padding, + bias_attr=False, + use_cudnn=self.use_cudnn, + act=None) + if self.conv_padding == [1, 1]: + cout = fluid.layers.reshape(conv_out, shape=[1, 1, 10816]) + elif self.conv_padding == 'VALID': + cout = fluid.layers.reshape(conv_out, shape=[1, 1, 7744]) + elif self.conv_padding == 'SAME': + cout = fluid.layers.reshape(conv_out, shape=[1, 1, 12544]) + elif self.conv_groups == 4: + cout = fluid.layers.reshape(conv_out, shape=[1, 1, 10816]) + result = fluid.layers.relu(cout) + loss = fluid.layers.cross_entropy(input=result, label=label_shape) + avg_loss = fluid.layers.mean(loss) + return avg_loss, result + + self.main_program.random_seed = 2 + self.startup_program.random_seed = 2 + self.test_main_program.random_seed = 2 + #self.test_startup_program.random_seed = 2 + with fluid.unique_name.guard(): + with fluid.program_guard(self.main_program, self.startup_program): + self.loss, result = network() + opt = fluid.optimizer.Adam(learning_rate=0.0001) + opt.minimize(self.loss) + with fluid.unique_name.guard(): + with fluid.program_guard(self.test_main_program, + self.startup_program): + network() + self.feeds = {"data": np.random.random([1, 28, 28]).astype("float32")} + self.fetch_list = [result] + self.enable_trt = True + self.trt_parameters = QuantDequantTensorRTSubgraphPassConvTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False) + self.activation_quantize_type = 'moving_average_abs_max' + self.weight_quantize_type = 'channel_wise_abs_max' + + def set_params(self): + self.conv_num_filters = 64 + self.conv_filter_size = 4 + self.conv_groups = 1 + self.conv_padding = [1, 1] + self.use_cudnn = True + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option( + use_gpu, atol=1e-1, flatten=False, rtol=1e-1) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class QuantDequantTensorRTSubgraphPassConvValidPaddingTest( + QuantDequantTensorRTSubgraphPassConvTest): + def set_params(self): + self.conv_num_filters = 64 + self.conv_filter_size = 4 + self.conv_groups = 1 + self.conv_padding = 'VALID' + self.use_cudnn = True + + +class QuantDequantTensorRTSubgraphPassConvSamePaddingTest( + QuantDequantTensorRTSubgraphPassConvTest): + def set_params(self): + self.conv_num_filters = 64 + self.conv_filter_size = 4 + self.conv_groups = 1 + self.conv_padding = 'SAME' + self.use_cudnn = True + + +class QuantDequantTensorRTSubgraphPassDWConvTest( + QuantDequantTensorRTSubgraphPassConvTest): + def set_params(self): + self.conv_num_filters = 64 + self.conv_filter_size = 4 + self.conv_groups = 4 + self.conv_padding = [1, 1] + self.use_cudnn = True + + +class DynamicShapeQuantDequantTensorRTSubgraphPassConvTest(QuantDequantTest): + def setUp(self): + self.set_params() + + def network(): + self.data = fluid.data( + name='data', shape=[1, 28, 28], dtype='float32') + data_reshape = fluid.layers.reshape(self.data, shape=[1, 4, 14, 14]) + self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') + label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1]) + conv_out = fluid.layers.conv2d( + input=data_reshape, + num_filters=self.conv_num_filters, + filter_size=self.conv_filter_size, + groups=self.conv_groups, + padding=self.conv_padding, + bias_attr=False, + use_cudnn=self.use_cudnn, + act=None) + cout = fluid.layers.reshape(conv_out, shape=[1, 1, 10816]) + result = fluid.layers.relu(cout) + loss = fluid.layers.cross_entropy(input=result, label=label_shape) + avg_loss = fluid.layers.mean(loss) + return avg_loss, result + + self.main_program.random_seed = 2 + self.startup_program.random_seed = 2 + self.test_main_program.random_seed = 2 + #self.test_startup_program.random_seed = 2 + with fluid.unique_name.guard(): + with fluid.program_guard(self.main_program, self.startup_program): + self.loss, result = network() + opt = fluid.optimizer.Adam(learning_rate=0.0001) + opt.minimize(self.loss) + with fluid.unique_name.guard(): + with fluid.program_guard(self.test_main_program, + self.startup_program): + network() + self.feeds = {"data": np.random.random([1, 28, 28]).astype("float32")} + self.fetch_list = [result] + self.enable_trt = True + self.trt_parameters = DynamicShapeQuantDequantTensorRTSubgraphPassConvTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False) + self.dynamic_shape_params = DynamicShapeQuantDequantTensorRTSubgraphPassConvTest.DynamicShapeParam( + { + "conv2d_0.tmp_0": [1, 4, 14, 14], + "data": [1, 28, 28], + "depthwise_conv2d_0.tmp_0": [1, 4, 14, 14], + "reshape2_0.tmp_0": [1, 4, 14, 14], + "reshape2_2.tmp_0": [1, 1, 10816] + }, { + "conv2d_0.tmp_0": [4, 4, 14, 14], + "data": [4, 28, 28], + "depthwise_conv2d_0.tmp_0": [4, 4, 14, 14], + "reshape2_0.tmp_0": [4, 4, 14, 14], + "reshape2_2.tmp_0": [1, 1, 43264] + }, { + "conv2d_0.tmp_0": [1, 4, 14, 14], + "data": [1, 28, 28], + "depthwise_conv2d_0.tmp_0": [1, 4, 14, 14], + "reshape2_0.tmp_0": [1, 4, 14, 14], + "reshape2_2.tmp_0": [1, 1, 10816] + }, False) + self.activation_quantize_type = 'moving_average_abs_max' + self.weight_quantize_type = 'channel_wise_abs_max' + + def set_params(self): + self.conv_num_filters = 64 + self.conv_filter_size = 4 + self.conv_groups = 1 + self.conv_padding = [1, 1] + self.use_cudnn = True + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option( + use_gpu, atol=1e-1, flatten=False, rtol=1e-1) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class QuantDequantTensorRTSubgraphPassConvTransposeTest(QuantDequantTest): + def setUp(self): + self.set_params() + + def network(): + self.data = fluid.data( + name='data', shape=[1, 28, 28], dtype='float32') + data_reshape = fluid.layers.reshape(self.data, shape=[1, 4, 14, 14]) + self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') + label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1]) + conv_out = fluid.layers.conv2d_transpose( + input=data_reshape, + num_filters=self.conv_num_filters, + filter_size=self.conv_filter_size, + groups=self.conv_groups, + padding=self.conv_padding, + bias_attr=False, + use_cudnn=self.use_cudnn, + act=None) + if self.conv_padding == [1, 1]: + cout = fluid.layers.reshape(conv_out, shape=[1, 1, 14400]) + elif self.conv_padding == 'VALID': + cout = fluid.layers.reshape(conv_out, shape=[1, 1, 18496]) + elif self.conv_padding == 'SAME': + cout = fluid.layers.reshape(conv_out, shape=[1, 1, 12544]) + elif self.conv_groups == 4: + cout = fluid.layers.reshape(conv_out, shape=[1, 1, 10816]) + result = fluid.layers.relu(cout) + loss = fluid.layers.cross_entropy(input=result, label=label_shape) + avg_loss = fluid.layers.mean(loss) + return avg_loss, result + + self.main_program.random_seed = 2 + self.startup_program.random_seed = 2 + self.test_main_program.random_seed = 2 + #self.test_startup_program.random_seed = 2 + with fluid.unique_name.guard(): + with fluid.program_guard(self.main_program, self.startup_program): + self.loss, result = network() + opt = fluid.optimizer.Adam(learning_rate=0.0001) + opt.minimize(self.loss) + with fluid.unique_name.guard(): + with fluid.program_guard(self.test_main_program, + self.startup_program): + network() + self.feeds = {"data": np.random.random([1, 28, 28]).astype("float32")} + self.fetch_list = [result] + self.enable_trt = True + self.trt_parameters = QuantDequantTensorRTSubgraphPassConvTransposeTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False) + self.activation_quantize_type = 'moving_average_abs_max' + self.weight_quantize_type = 'channel_wise_abs_max' + + def set_params(self): + self.conv_num_filters = 64 + self.conv_filter_size = 4 + self.conv_groups = 1 + self.conv_padding = [1, 1] + self.use_cudnn = True + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option( + use_gpu, atol=1e-1, flatten=False, rtol=1e-1) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class QuantDequantTensorRTSubgraphPassConvTransValidPaddingTest( + QuantDequantTensorRTSubgraphPassConvTransposeTest): + def set_params(self): + self.conv_num_filters = 64 + self.conv_filter_size = 4 + self.conv_groups = 1 + self.conv_padding = 'VALID' + self.use_cudnn = True + + +class QuantDequantTensorRTSubgraphPassConvTransSamePaddingTest( + QuantDequantTensorRTSubgraphPassConvTransposeTest): + def set_params(self): + self.conv_num_filters = 64 + self.conv_filter_size = 4 + self.conv_groups = 1 + self.conv_padding = 'SAME' + self.use_cudnn = True + + +class QuantDequantTensorRTSubgraphPassTransDWConvTest( + QuantDequantTensorRTSubgraphPassConvTransposeTest): + def set_params(self): + self.conv_num_filters = 64 + self.conv_filter_size = 4 + self.conv_groups = 4 + self.conv_padding = [1, 1] + self.use_cudnn = True + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py index 48706bf5ad1fd985dfc3191286370983d0820730..dd6232fac459e480e884b4662959ccce5f30d1cc 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py @@ -17,9 +17,11 @@ from __future__ import print_function import unittest import numpy as np from inference_pass_test import InferencePassTest +from quant_dequant_test import QuantDequantTest import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker class FCFusePassTRTTest(InferencePassTest): @@ -31,10 +33,7 @@ class FCFusePassTRTTest(InferencePassTest): size=128, num_flatten_dims=1, act="relu") - fc_out2 = fluid.layers.fc(input=fc_out1, - size=32, - num_flatten_dims=1) - out = fluid.layers.softmax(input=fc_out2) + out = fluid.layers.softmax(input=fc_out1) self.feeds = { "data": np.random.random((32, 128, 2, 2)).astype("float32") @@ -55,5 +54,236 @@ class FCFusePassTRTTest(InferencePassTest): self.check_output_with_option(use_gpu[i]) +class FCFusePassTRTStaticDims4Cols1Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[32, 128, 32, 8], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=64, + num_flatten_dims=1, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = { + "data": np.random.random((32, 128, 32, 8)).astype("float32") + } + self.enable_trt = True + self.trt_parameters = FCFusePassTRTStaticDims4Cols1Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +class FCFusePassTRTStaticDims4Cols2Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[3, 24, 16, 16], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=32, + num_flatten_dims=2, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = { + "data": np.random.random((3, 24, 16, 16)).astype("float32") + } + self.enable_trt = True + self.trt_parameters = FCFusePassTRTStaticDims4Cols2Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +class FCFusePassTRTDynamicDims2Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[32, 128], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=64, + num_flatten_dims=1, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = {"data": np.random.random((32, 128)).astype("float32")} + self.enable_trt = True + self.trt_parameters = FCFusePassTRTDynamicDims2Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = FCFusePassTRTDynamicDims2Test.DynamicShapeParam( + { + 'data': [1, 128] + }, {'data': [64, 128]}, {'data': [32, 128]}, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=64, + num_flatten_dims=1, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")} + self.enable_trt = True + self.trt_parameters = FCFusePassTRTDynamicDims3Cols1Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols1Test.DynamicShapeParam( + { + 'data': [1, 128, 32] + }, {'data': [64, 128, 32]}, {'data': [32, 128, 32]}, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=64, + num_flatten_dims=2, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")} + self.enable_trt = True + self.trt_parameters = FCFusePassTRTDynamicDims3Cols2Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols2Test.DynamicShapeParam( + { + 'data': [1, 32, 32] + }, {'data': [64, 256, 32]}, {'data': [32, 128, 32]}, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[32, 12, 4, 6], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=64, + num_flatten_dims=1, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = { + "data": np.random.random((32, 12, 4, 6)).astype("float32") + } + self.enable_trt = True + self.trt_parameters = FCFusePassTRTDynamicDims4Cols1Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols1Test.DynamicShapeParam( + { + 'data': [1, 12, 4, 6] + }, {'data': [64, 12, 4, 6]}, {'data': [32, 12, 4, 6]}, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[32, 128, 32, 32], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=64, + num_flatten_dims=2, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = { + "data": np.random.random((32, 128, 32, 32)).astype("float32") + } + self.enable_trt = True + self.trt_parameters = FCFusePassTRTDynamicDims4Cols2Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols2Test.DynamicShapeParam( + { + 'data': [1, 64, 32, 32] + }, {'data': [64, 256, 32, 32]}, {'data': [32, 128, 32, 32]}, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[32, 128, 32, 32], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=64, + num_flatten_dims=3, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = { + "data": np.random.random((32, 128, 32, 32)).astype("float32") + } + self.enable_trt = True + self.trt_parameters = FCFusePassTRTDynamicDims4Cols3Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols3Test.DynamicShapeParam( + { + 'data': [1, 128, 32, 32] + }, {'data': [64, 128, 32, 32]}, {'data': [32, 128, 32, 32]}, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_quant_dequant_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_quant_dequant_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..114fa6478f8a6f2985be92dd13a3c4731bb207c3 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_quant_dequant_pass.py @@ -0,0 +1,207 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +from quant_dequant_test import QuantDequantTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker + + +class FCQuantDequantFusePassTRTDims3Cols1Test(QuantDequantTest): + def setUp(self): + def network(): + self.data = fluid.data( + name='data', shape=[1, 28, 28], dtype='float32') + self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') + fc_out = fluid.layers.fc(input=self.data, + size=10, + num_flatten_dims=1, + bias_attr=False, + act="relu") + result = fluid.layers.relu(fc_out) + loss = fluid.layers.cross_entropy(input=result, label=self.label) + avg_loss = fluid.layers.mean(loss) + return avg_loss, result + + self.main_program.random_seed = 2 + self.startup_program.random_seed = 2 + self.test_main_program.random_seed = 2 + #self.test_startup_program.random_seed = 2 + with fluid.unique_name.guard(): + with fluid.program_guard(self.main_program, self.startup_program): + self.loss, result = network() + opt = fluid.optimizer.Adam(learning_rate=0.0001) + opt.minimize(self.loss) + with fluid.unique_name.guard(): + with fluid.program_guard(self.test_main_program, + self.startup_program): + network() + self.feeds = {"data": np.random.random((1, 28, 28)).astype("float32")} + self.fetch_list = [result] + self.enable_trt = True + self.trt_parameters = FCQuantDequantFusePassTRTDims3Cols1Test.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False) + self.dynamic_shape_params = FCQuantDequantFusePassTRTDims3Cols1Test.DynamicShapeParam( + { + 'data': [1, 28, 28], + 'reshape2_1.tmp_0': [1, 1, 10] + }, {'data': [2, 28, 28], + 'reshape2_1.tmp_0': [2, 1, 10]}, + {'data': [1, 28, 28], + 'reshape2_1.tmp_0': [1, 1, 10]}, False) + self.activation_quantize_type = 'moving_average_abs_max' + self.weight_quantize_type = 'channel_wise_abs_max' + + def test_check_output(self): + #self.quant_dequant() + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option( + use_gpu, atol=1e-2, flatten=False, rtol=1e-2) + self.assertTrue( + PassVersionChecker.IsCompatible( + 'quant_conv2d_dequant_fuse_pass')) + + +class FCQuantDequantFusePassTRTDims3Cols2Test(QuantDequantTest): + def setUp(self): + def network(): + self.data = fluid.data( + name='data', shape=[1, 28, 28], dtype='float32') + self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') + label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1]) + fc_out = fluid.layers.fc(input=self.data, + size=28, + num_flatten_dims=2, + bias_attr=False, + act=None) + c_out = fluid.layers.reshape(fc_out, shape=[1, 1, 784]) + result = fluid.layers.relu(c_out) + loss = fluid.layers.cross_entropy(input=result, label=label_shape) + avg_loss = fluid.layers.mean(loss) + return avg_loss, result + + self.main_program.random_seed = 2 + self.startup_program.random_seed = 2 + self.test_main_program.random_seed = 2 + #self.test_startup_program.random_seed = 2 + with fluid.unique_name.guard(): + with fluid.program_guard(self.main_program, self.startup_program): + self.loss, result = network() + opt = fluid.optimizer.Adam(learning_rate=0.0001) + opt.minimize(self.loss) + with fluid.unique_name.guard(): + with fluid.program_guard(self.test_main_program, + self.startup_program): + network() + self.feeds = {"data": np.random.random((1, 28, 28)).astype("float32")} + self.fetch_list = [result] + self.enable_trt = True + self.trt_parameters = FCQuantDequantFusePassTRTDims3Cols2Test.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False) + self.dynamic_shape_params = FCQuantDequantFusePassTRTDims3Cols2Test.DynamicShapeParam( + { + 'data': [1, 28, 28], + 'reshape2_1.tmp_0': [1, 1, 784] + }, {'data': [4, 28, 28], + 'reshape2_1.tmp_0': [4, 1, 784]}, + {'data': [1, 28, 28], + 'reshape2_1.tmp_0': [1, 1, 784]}, False) + self.activation_quantize_type = 'moving_average_abs_max' + self.weight_quantize_type = 'channel_wise_abs_max' + + def test_check_output(self): + #self.quant_dequant() + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option( + use_gpu, atol=1e-1, flatten=False, rtol=1e-1) + self.assertTrue( + PassVersionChecker.IsCompatible( + 'quant_conv2d_dequant_fuse_pass')) + + +class FCQuantDequantFusePassTRTDims3Cols3Test(QuantDequantTest): + def setUp(self): + def network(): + self.data = fluid.data( + name='data', shape=[1, 28, 28], dtype='float32') + self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') + label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1]) + reshape_out = fluid.layers.reshape(self.data, shape=[1, 14, 14, 4]) + fc_out = fluid.layers.fc(input=reshape_out, + size=14, + num_flatten_dims=3, + bias_attr=False, + act=None) + c_out = fluid.layers.reshape(fc_out, shape=[1, 1, 2744]) + result = fluid.layers.relu(c_out) + loss = fluid.layers.cross_entropy(input=result, label=label_shape) + avg_loss = fluid.layers.mean(loss) + return avg_loss, result + + self.main_program.random_seed = 2 + self.startup_program.random_seed = 2 + self.test_main_program.random_seed = 2 + #self.test_startup_program.random_seed = 2 + with fluid.unique_name.guard(): + with fluid.program_guard(self.main_program, self.startup_program): + self.loss, result = network() + opt = fluid.optimizer.Adam(learning_rate=0.0001) + opt.minimize(self.loss) + with fluid.unique_name.guard(): + with fluid.program_guard(self.test_main_program, + self.startup_program): + network() + self.feeds = {"data": np.random.random((1, 28, 28)).astype("float32")} + self.fetch_list = [result] + self.enable_trt = True + self.trt_parameters = FCQuantDequantFusePassTRTDims3Cols3Test.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False) + self.dynamic_shape_params = FCQuantDequantFusePassTRTDims3Cols3Test.DynamicShapeParam( + { + 'data': [1, 28, 28], + "reshape2_1.tmp_0": [1, 14, 14, 4], + "reshape2_2.tmp_0": [1, 1, 2744] + }, { + 'data': [4, 28, 28], + "reshape2_1.tmp_0": [4, 14, 14, 4], + "reshape2_2.tmp_0": [4, 1, 2744] + }, { + 'data': [1, 28, 28], + "reshape2_1.tmp_0": [1, 14, 14, 4], + "reshape2_2.tmp_0": [1, 1, 2744] + }, False) + self.activation_quantize_type = 'moving_average_abs_max' + self.weight_quantize_type = 'channel_wise_abs_max' + + def test_check_output(self): + #self.quant_dequant() + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option( + use_gpu, atol=1e0, flatten=False, rtol=1e0) + self.assertTrue( + PassVersionChecker.IsCompatible( + 'quant_conv2d_dequant_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py new file mode 100644 index 0000000000000000000000000000000000000000..86cc15a713e145e16bf973b870a4d858cb257c37 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py @@ -0,0 +1,204 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +from quant_dequant_test import QuantDequantTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker +from paddle.fluid.core import AnalysisConfig + + +class TensorRTMatMulQuantDequantDims3Test(QuantDequantTest): + def setUp(self): + self.set_params() + + def network(): + self.data = fluid.data( + name='data', shape=[1, 28, 28], dtype='float32') + self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') + matmul_out = fluid.layers.matmul( + x=self.data, + y=self.data, + transpose_x=self.transpose_x, + transpose_y=self.transpose_y, + alpha=self.alpha) + fc_out = fluid.layers.fc(input=matmul_out, + size=10, + num_flatten_dims=1, + bias_attr=False, + act=None) + result = fluid.layers.relu(fc_out) + loss = fluid.layers.cross_entropy(input=result, label=self.label) + avg_loss = fluid.layers.mean(loss) + return avg_loss, result + + self.main_program.random_seed = 2 + self.startup_program.random_seed = 2 + self.test_main_program.random_seed = 2 + #self.test_startup_program.random_seed = 2 + with fluid.unique_name.guard(): + with fluid.program_guard(self.main_program, self.startup_program): + self.loss, result = network() + opt = fluid.optimizer.Adam(learning_rate=0.0001) + opt.minimize(self.loss) + with fluid.unique_name.guard(): + with fluid.program_guard(self.test_main_program, + self.startup_program): + network() + self.feeds = {"data": np.random.random([1, 28, 28]).astype("float32")} + self.fetch_list = [result] + self.enable_trt = True + self.trt_parameters = TensorRTMatMulQuantDequantDims3Test.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False) + self.activation_quantize_type = 'moving_average_abs_max' + self.weight_quantize_type = 'channel_wise_abs_max' + + def set_params(self): + self.transpose_x = False + self.transpose_y = False + self.alpha = 1.0 + + def test_check_output(self): + #self.quant_dequant() + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option( + use_gpu, atol=1e-1, flatten=False, rtol=1e-1) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TensorRTMatMulQuantDequantDims3TransposeXTest( + TensorRTMatMulQuantDequantDims3Test): + def set_params(self): + self.transpose_x = True + self.transpose_y = False + self.alpha = 1.0 + + +class TensorRTMatMulQuantDequantDims3TransposeYTest( + TensorRTMatMulQuantDequantDims3Test): + def set_params(self): + self.transpose_x = False + self.transpose_y = True + self.alpha = 1.0 + + +class TensorRTMatMulQuantDequantDims3TransposeXYTest( + TensorRTMatMulQuantDequantDims3Test): + def set_params(self): + self.transpose_x = True + self.transpose_y = True + self.alpha = 1.0 + + +class TensorRTMatMulQuantDequantDims4Test(QuantDequantTest): + def setUp(self): + self.set_params() + + def network(): + self.data = fluid.data( + name='data', shape=[1, 28, 28], dtype='float32') + self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') + reshape_out = fluid.layers.reshape(self.data, shape=[1, 4, 14, 14]) + matmul_out = fluid.layers.matmul( + x=reshape_out, + y=reshape_out, + transpose_x=self.transpose_x, + transpose_y=self.transpose_y, + alpha=self.alpha) + out = fluid.layers.batch_norm(matmul_out, is_test=True) + fc_out = fluid.layers.fc(input=matmul_out, + size=10, + num_flatten_dims=1, + bias_attr=False, + act=None) + result = fluid.layers.relu(fc_out) + loss = fluid.layers.cross_entropy(input=result, label=self.label) + avg_loss = fluid.layers.mean(loss) + return avg_loss, result + + self.main_program.random_seed = 2 + self.startup_program.random_seed = 2 + self.test_main_program.random_seed = 2 + #self.test_startup_program.random_seed = 2 + with fluid.unique_name.guard(): + with fluid.program_guard(self.main_program, self.startup_program): + self.loss, result = network() + opt = fluid.optimizer.Adam(learning_rate=0.0001) + opt.minimize(self.loss) + with fluid.unique_name.guard(): + with fluid.program_guard(self.test_main_program, + self.startup_program): + network() + self.feeds = {"data": np.random.random([1, 28, 28]).astype("float32")} + self.fetch_list = [result] + self.enable_trt = True + self.trt_parameters = TensorRTMatMulQuantDequantDims4Test.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False) + self.activation_quantize_type = 'moving_average_abs_max' + self.weight_quantize_type = 'channel_wise_abs_max' + + def set_params(self): + self.transpose_x = False + self.transpose_y = False + self.alpha = 1.0 + + def test_check_output(self): + #self.quant_dequant() + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option( + use_gpu, atol=1e-1, flatten=False, rtol=1e-1) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TensorRTMatMulQuantDequantDims4TransposeXTest( + TensorRTMatMulQuantDequantDims4Test): + def set_params(self): + self.transpose_x = True + self.transpose_y = False + self.alpha = 1.0 + + +class TensorRTMatMulQuantDequantDims4TransposeYTest( + TensorRTMatMulQuantDequantDims4Test): + def set_params(self): + self.transpose_x = False + self.transpose_y = True + self.alpha = 1.0 + + +class TensorRTMatMulQuantDequantDims4TransposeXYTest( + TensorRTMatMulQuantDequantDims4Test): + def set_params(self): + self.transpose_x = True + self.transpose_y = True + self.alpha = 1.0 + + +class TensorRTMatMulQuantDequantDims4ScaleTest( + TensorRTMatMulQuantDequantDims4Test): + def set_params(self): + self.transpose_x = False + self.transpose_y = False + self.alpha = 2.0 + + +if __name__ == "__main__": + unittest.main()