Hey all,
I am trying to do a benchmark, in order to be able to compare our GPU cluster and our Supercomputer. The GPU cluster has one Nvidia GTX680 per node
and 10G ethernet, while the supercomputer has no GPU's and Infiniband
In short, i open a file with 31 million triangles, run D3() to divide it over the processes, and render it to a 1600x1200 renderview, using a python
script (attached) and pvpython (paraview 3.98.1).
However, i am running into some unexpected deviations in my frame render times on the GPU-cluster, when using multiple (8+) nodes. Most frames render
in a constant time, but every so many frames i get one 2 to 3 times as slow. The output of the benchmark module doesnt shine much light on the
subject: All processes report the same, much higher frame time for these frames, so it is not clear if one process is slowing the whole down.
Our initial hunch was that it might have to do with the compositing step: Since the infiniband is much faster in terms of latency and throughput, this
step might be more efficient on the Supercomputer.
My question is, is there a way to separate the benchmark times of the rendering
and the compositing part of each frame?
Or is there any other advice on pinpointing the performance culprit?
regards,
Tijs de Kler
SURFsara Visualization
from paraview.simple import *
paraview.simple._DisableFirstRenderCameraReset()
import time
import math
import os
import subprocess
from optparse import OptionParser
import paraview.benchmark
paraview.benchmark.maximize_logs()
parser = OptionParser()
parser.add_option("-n", "", dest="num_nodes", help="number of nodes")
parser.add_option("-p", "", dest="ppn", help="proces per node", default=1)
parser.add_option("-s", "", dest="server", help="paraview server", default="v42-7")
parser.add_option("-f", "", dest="prefix", help="becnhmark filename prefix", default="paraview")
parser.add_option("-q", "", dest="qjob", help="job number")
(options, args) = parser.parse_args()
Connect(options.server)
#Connect("v42-7")
print "test1"
oldTime = time.time()
#que?
process = subprocess.Popen(['qstat', '-x', options.qjob], stdout=subprocess.PIPE)
out, err = process.communicate()
#print out
nodeList = list(set(out.split('<exec_host>')[1].split('</exec_host>')[0].replace('/0', '').replace('/1', '').replace('/2', '').replace('/3', '').split('+')))
#nodeList = list(set(out.split('><')[17].split('>')[1].split('<')[0].replace('/0', '').split('+')))
print nodeList
#.split(' ')[0])
#nodeList = ['v41-7', 'v42-7', 'v42-5', 'v42-3','v42-2', 'v42-1', 'v41-8', 'v41-1']
interfaceList = ['vlan215']
#print interfaceList[1]
filename = options.prefix+ "_"+str(options.num_nodes)+"_"+str(options.ppn)+".txt"
f = open(filename, 'w')
newTime = time.time()
f.write("start perf; " + str(newTime-oldTime) +"\n")
oldTime=newTime
#ss002_512_3_bin_vtk = LegacyVTKReader( FileNames=['/home/paulm/datasets/cosmogrid/ss002.512_3.bin.vtk'] )
#contourFile = LegacyVTKReader( FileNames=['/home/tijs/test.vtk'] )
poging_pvtp = XMLPartitionedPolydataReader( FileName=['/home/tijs/data/poging.pvtp'] )
newTime = time.time()
f.write("end loading data; " + str(newTime-oldTime) +"\n")
oldTime=newTime
D31 = D3()
RenderView1 = GetRenderView()
RenderView1.CenterOfRotation = [255.5, 255.5, 255.5]
#Contour1 = Contour( PointMergeMethod="Uniform Binning" )
RenderView1.CameraPosition = [255.5, 255.5, 1965.3393248400794]
RenderView1.DepthPeeling = 0
RenderView1.CameraFocalPoint = [255.5, 255.5, 255.5]
RenderView1.CameraClippingRange = [1184.2959315916785, 2375.5144147126807]
RenderView1.CameraParallelScale = 442.53898133384814
#RenderView1.ViewSize=[6400,4800]
RenderView1.ViewSize=[1600,1200]
#RenderView1.ViewSize=[160,120]
#Contour1.PointMergeMethod = "Uniform Binning"
#Contour1.ContourBy = ['POINTS', 'scalars']
#Contour1.Isosurfaces = [0.2, 1.2, 2.2, 3.2, 4.2]
#DataRepresentation2 = Show()
#DataRepresentation2.ScaleFactor = 1.1
#DataRepresentation2.SelectionPointFieldDataArrayName = 'vtkProcessId'
#DataRepresentation2.EdgeColor = [0.0, 0.0, 0.5000076295109483]
ProcessIdScalars2 = ProcessIdScalars()
a1_ProcessId_PVLookupTable = GetLookupTableForArray( "ProcessId", 1, RGBPoints=[0.0, 0.0, 0.0, 1.0, float(int(options.ppn)*int(options.num_nodes)), 1.0, 1.0, 0.0], VectorMode='Magnitude', NanColor=[0.25, 0.0, 0.0], ColorSpace='Diverging', ScalarRangeInitialized=1.0, AllowDuplicateScalars=1 )
#a1_ProcessId_PiecewiseFunction = CreatePiecewiseFunction( Points=[0.0, 0.0, 0.5, 0.0, 16.0, 1.0, 0.5, 0.0] )
DataRepresentation4 = Show()
DataRepresentation4.EdgeColor = [0.0, 0.0, 0.5000076295109483]
DataRepresentation4.SelectionPointFieldDataArrayName = 'ProcessId'
DataRepresentation4.ColorArrayName = 'ProcessId'
DataRepresentation4.LookupTable = a1_ProcessId_PVLookupTable
DataRepresentation4.ScaleFactor = 51.1
#a1_ProcessId_PVLookupTable.ScalarOpacityFunction = a1_ProcessId_PiecewiseFunction
Render()
f.write("Number of polygons: " +str(DataRepresentation4.SMProxy.GetRepresentedDataInformation().GetPolygonCount()) +"\n")
#f.write("Number of polygons: " +str(ProcessIdScalars2.SMProxy.GetRepresentedDataInformation().GetPolygonCount()) +"\n")
#writer= CreateWriter("/home/tijs/render_benchmark/results/d3_"+str(options.num_nodes)+"_"+str(options.ppn)+".pvtu",D31)
#writer.UpdatePipeline()
cam = GetActiveCamera()
timeList = []
SXList=[]
newTime = time.time()
f.write("callib: " + str(newTime-oldTime) +"\n\n")
oldTime=newTime
paraview.benchmark.get_logs()
for i in range(0,3):
cam.Azimuth(1)
paraview.benchmark.get_logs()
Render()
newTime = time.time()
currentTime = newTime-oldTime
oldTime=newTime
endList = []
startList = []
TXStartList=[]
TXEndList=[]
for node in nodeList :
process = subprocess.Popen(['ssh', node, '-fx' , '/sbin/ifconfig '+ interfaceList[0]+ ' | grep RX|grep bytes'], stdout=subprocess.PIPE)
out, err = process.communicate()
startList.append(out.split(':')[1].split(' ')[0])
TXStartList.append(out.split(':')[2].split(' ')[0])
newTime = time.time()
currentTime = newTime-oldTime
oldTime=newTime
numTest =10
for i in range(0,numTest):
cam.Azimuth(8)
Render()
#paraview.benchmark.print_logs()
newTime = time.time()
currentTime = newTime-oldTime
timeList.append(currentTime)
oldTime=newTime
# make stats
for node in nodeList :
process = subprocess.Popen(['ssh', node, '-fx' , '/sbin/ifconfig '+ interfaceList[0]+ ' | grep RX|grep bytes'], stdout=subprocess.PIPE)
out, err = process.communicate()
endList.append(out.split(':')[1].split(' ')[0])
TXEndList.append(out.split(':')[2].split(' ')[0])
totalTime=0.0
for i in range(0,numTest):
f.write("run "+str(i) + " : " + str(timeList[i]) +"\n")
totalTime += timeList[i]
average = totalTime/float(numTest)
f.write("average: " + str(average) +"\n")
varTime=0.0
for i in range(0,numTest) :
varTime += (timeList[i]-average)*(timeList[i]-average)
deviation = math.sqrt(varTime/float(numTest))
f.write("standard dev : " + str(deviation) +"\n")
for i in range(0,len(nodeList)) :
print nodeList[i] + " "+interfaceList[0]+": " +str((long(endList[i])-long(startList[i]))/10000000.0) + " MegaBytes"
f.write(nodeList[i] + " "+interfaceList[0]+ ": " +str((long(endList[i])-long(startList[i]))/10000000.0) + " MegaBytes\n")
for i in range(0,len(nodeList)) :
print "transfer "+nodeList[i] + " "+interfaceList[0]+": " +str((long(TXEndList[i])-long(TXStartList[i]))/10000000.0) + " MegaBytes"
f.write("transfer " + nodeList[i] + " "+interfaceList[0]+ ": " +str((long(TXEndList[i])-long(TXStartList[i]))/10000000.0) + " MegaBytes\n")
cam.Azimuth(60)
cam.Elevation(30)
RenderView1.WriteImage("/home/tijs/render_benchmark/"+ options.prefix+"_"+str(options.num_nodes)+"_"+ str(options.ppn)+".png", "vtkPNGWriter")
newTime = time.time()
f.write("end perf test; " + str(newTime-oldTime) +"\n")
oldTime=newTime
paraview.benchmark.get_logs()
logname="rawlog.txt"
paraview.benchmark.dump_logs(logname)
_______________________________________________
Powered by www.kitware.com
Visit other Kitware open-source projects at
http://www.kitware.com/opensource/opensource.html
Please keep messages on-topic and check the ParaView Wiki at:
http://paraview.org/Wiki/ParaView
Follow this link to subscribe/unsubscribe:
http://www.paraview.org/mailman/listinfo/paraview