bgawrych opened a new pull request #20533:
URL: https://github.com/apache/incubator-mxnet/pull/20533


   ## Description ##
   This PR is continuation of 
https://github.com/apache/incubator-mxnet/pull/20450, where enabling BRGEMM 
automatically was requested. 
   
   
   Following script was used to benchmark - only single iteration was 
benchmarked to avoid measuring performance with cached reordered weights for 
BRGEMM.
   
   ```
   import mxnet as mx
   from mxnet import nd
   from mxnet.gluon import nn
   import time
   
   class CalibIter(mx.io.DataIter):
       def __init__(self, batch, data_shape, batch_size):
           super(CalibIter, self).__init__(batch_size)
           self.label_shape = (batch_size,)
           self.data_shape = data_shape
           if isinstance(data_shape, tuple):
             self.provide_data = [('data', data_shape)]
           else:
             self.provide_data = data_shape
           self.provide_label = []
           self.batch = batch
   
       def __iter__(self):
           yield self.batch
   
   def test_qfc():
       results = dict()
       N  = [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
             4096, 8192, 9216, 10240, 11264, 12288, 13312,
             14336, 15360, 16384, 32768, 65536, 131072]
       IC = [32, 64, 128, 256, 512, 1024, 2048, 4096]
       OC = [32, 64, 128, 256, 512, 1024, 2048, 4096]
   
       for ic in IC:
           for oc in OC:
               net = nn.Dense(units=oc, flatten=True,
                              weight_initializer=mx.init.Normal(),
                              bias_initializer=mx.init.Normal())
               net.initialize()
               net.hybridize(static_alloc=True, static_shape=True)
               x = mx.nd.random_uniform(shape=(1, ic), low=-1.0, high=1.0)
               net(x)
               batch = mx.io.DataBatch([x])
               calib_data = CalibIter(batch, [mx.io.DataDesc("data", shape=(1, 
ic), dtype='float32')], 1)
               net_quantized = mx.contrib.quant.quantize_net_v2(net, 
quantized_dtype='auto',
                                                                
exclude_layers=None,
                                                                
exclude_layers_match=None,
                                                                
calib_data=calib_data,
                                                                
calib_mode='naive',
                                                                
num_calib_examples=1,
                                                                
ctx=mx.current_context())
               net_quantized.hybridize(static_alloc=True, static_shape=True)
               mx.nd.waitall()
               total = 0
               for bs in N:
                   x = mx.nd.random_uniform(shape=(bs, ic), low=-1.0, high=1.0)
                   mx.nd.waitall()
                   tic = time.time()
                   o = net_quantized(x)
                   o.wait_to_read()
                   total = time.time() - tic
                   results[(bs, ic, oc)] = total
   
       for k,v in results.items():
           print(f"{k[0]};{k[1]};{k[2]};{v}")
   
   test_qfc()
   ```
   Attaching spreadsheet with results on CLX8280 (before this change). 
   
[brgemm_igemm_cmp.xlsx](https://github.com/apache/incubator-mxnet/files/7000191/brgemm_igemm_cmp.xlsx)
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to