stable_length_box_thresholds.py 12.7 KB
import os, sys, argparse
import numpy
import matplotlib.pyplot as plt
import json
from pylab import *
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.patches import Rectangle
from matplotlib.backends.backend_qt4agg import FigureCanvasQTAgg as FigureCanvas
from sets import Set

from common import *

def plot_stable_length_box(args, threshold_list, budget_list):
	dir_path = args.input_dir
	output_dir_path = args.output_dir
	
	benchmarks = ['gobmk','bzip2']	# you can change benchmarks but should be only 2
	box_colors=['r','b','y','c','m','g','k','w']
	fig = plt.figure(figsize=(7.5,2.25))
	#ax = fig.add_subplot('111')

	max_y = 0
	# finding points with 3% threshold of target budget/inefficiency
	thresh = 3
	for bmark in benchmarks: 
	  box_data = [ [] for thr in threshold_list]
	  for threshold_index, cluster_thresh in enumerate(threshold_list):
		data = []
		for budget in budget_list:
			bmarkDirPath = os.path.join(os.path.join(dir_path, "per_sample_data"), bmark)
			frontiers_file = os.path.join(bmarkDirPath, "per_sample_frontiers.json")
			frontiers_data = json.loads(open(frontiers_file).read())

			data_to_plot=[]
			sample_points=[]
			cpu_rectangle_points=[]
			for data in frontiers_data["data"]:
				filtered_data=[]
				#filtering all those points which have inefficiency with error% of the budget
				for point in data:
					if point["inefficiency"] <= budget:
						filtered_data.append(point)
					elif (point["inefficiency"] - budget) * 100 / budget <= thresh:
						filtered_data.append(point)

				#finding the point with highest performance among filtered data
				optimal_point = filtered_data[0]
				for point in filtered_data:
					if point["speedup"] > optimal_point["speedup"]:
						optimal_point = point

				local_rect_points=[]
				for point in filtered_data:
					if (optimal_point["speedup"] - point["speedup"]) < 0:
						print "something is wrong!"

					if (optimal_point["speedup"] - point["speedup"]) * 100 / optimal_point["speedup"] <= cluster_thresh:
						data_to_plot.append(point)
						sample_points.append(frontiers_data["data"].index(data))
						local_rect_points.append(point)

			cpufpoints = [cpuf for cpuf in [data_to_plot[sample]["cpu_freq"] for sample in range(len(data_to_plot))]]
			memfpoints = [memf for memf in [data_to_plot[sample]["mem_freq"] for sample in range(len(data_to_plot))]]
			samplepoints = sample_points

			#Total number of transitions
			num_transitions = 0.0
			#Available settings for (CPU, MEM)
			settings_available = Set()
			index = 0
			current_sample = -1
			length = 1
			lengths = []
			while index < len( samplepoints ):
				current_sample = samplepoints[index]

				#Construct the current settings
				current_settings = Set()
				while index < len( samplepoints ) and samplepoints[index] == current_sample:
					current_settings.add((cpufpoints[index],memfpoints[index]))
					index = index + 1

				#Compute the common points between current and what is available
				common_points  = current_settings.intersection(settings_available)

				# When there are no common points, transition
				if (len(common_points) == 0):
					settings_available = current_settings #all current settings are now available
					#Ignore the transition if it's the first sample
					if (current_sample!=0):
						num_transitions += 1
				else: # Continue with the common points
					settings_available = common_points

				# Record the length if we need to transition or we are at the end
				if (len(common_points) == 0 or index == len( samplepoints )):
					#Ignore the length if the transition is for the first sample
					if (current_sample!=0):
						lengths.append(length)
					#reset the length
					length = 1
				else: #otherwise, just increment the length
					length += 1
			box_data[threshold_index].append(lengths)
#			if max_y < max(lengths):
			max_y = max(lengths)
		
	
	  offset = 0.1 #10% offset
	  ax = fig.add_subplot(1, 3, benchmarks.index(bmark)+1)
	  for threshold_index, cluster_thresh in enumerate(threshold_list):
		pos = [i+0.75+threshold_index*0.25 for i in range(len(budget_list))]
		
#		if bmark =='gobmk':
#		  print box_data[threshold_index]
		for data in box_data[threshold_index]:
	#	  	print data
			sup_idx = box_data[threshold_index].index(data)
			idx =0
			for point in data:
#				print str(idx) + "   " + str(point)
				box_data[threshold_index][sup_idx][idx] += 1e-6 * idx
#				print str(idx) + "   " + str(box_data[threshold_index][sup_idx][idx])
				idx += 1
				if point < 1:
					print point
					print "something is wrong!"
#			print box_data[threshold_index]

		bp = ax.boxplot(box_data[threshold_index], widths=0.2123, positions = pos, whis=float('inf'))
		for i in range(len(budget_list)):
			box = bp['boxes'][i]
			boxX = []
			boxY = []
			for j in range(len(budget_list)+1):
				boxX.append(box.get_xdata()[j])
				boxY.append(box.get_ydata()[j])
			boxCoords = zip(boxX, boxY)
			boxPolygon = Polygon(boxCoords,	facecolor=box_colors[threshold_index])
			ax.add_patch(boxPolygon)

			# Now draw the median lines back over what we just filled in
			med = bp['medians'][i]
			medianX = []
			medianY = []
			for j in range(2):
				medianX.append(med.get_xdata()[j])
				medianY.append(med.get_ydata()[j])
				plt.plot(medianX, medianY, 'k')
			# Finally, overplot the sample averages, with horizontal alignment
			# in the center of each box
			plt.plot([np.average(med.get_xdata())], [np.average(box_data[threshold_index][i])], color='w', marker='*',markeredgecolor='k', markersize=5)

		for box in bp['boxes']:
			box.set(color=box_colors[threshold_index])

		for whiskers in bp['whiskers']:
			whiskers.set(color='b')

		for flier in bp['fliers']:
			flier.set(color='r', marker='x', markersize=int(2.5), markeredgecolor='r')

	  if bmark == 'gobmk':
			ax.set_ylabel("Length of Stable \nRegions (in Samples)")
			ax.text( len(budget_list)/2-0.25, -1.75, "(a) Gobmk",fontsize=text_size)
			ax.set_ylim([0.75, 8.1])
#			ax.text(4.3, 2.9, "8")
	  elif bmark == 'bzip2':
			ax.text( len(budget_list)/2-0.25, -86, "(b) Bzip2",fontsize=text_size)
			ax.set_ylim([-7, 220])
			#ax.text(2.2, 73, "119")
			#ax.text(3.9, 73, "211")
#			ax.set_yscale('semilog', nonposy='mask')
		
	  ax.set_xlim([0, len(budget_list)+1])
	  ax.yaxis.grid('on')
	
	  x_ticks = xrange(1, len(budget_list)+1, 1)
	  ax.set_xticks(x_ticks)
	  ax.set_xticklabels(budget_list)
			
	  ax.yaxis.set_ticks_position('left')
	  ax.xaxis.set_ticks_position('bottom')

# plot 3rd subplot
	benchmarks, bmark_labels = get_benchmarks(args)
	budget = 1.3

	# finding points with 3% threshold of target budget/inefficiency
	thresh = 3
	box_data = [ [] for thresh in threshold_list]
	for threshold_index, cluster_thresh in enumerate(threshold_list):
		data = []
		for bmark in benchmarks:
			bmarkDirPath = os.path.join(os.path.join(dir_path, "per_sample_data"), bmark)
			frontiers_file = os.path.join(bmarkDirPath, "per_sample_frontiers.json")
			frontiers_data = json.loads(open(frontiers_file).read())

			data_to_plot=[]
			sample_points=[]
			cpu_rectangle_points=[]
			for data in frontiers_data["data"]:
				filtered_data=[]
				#filtering all those points which have inefficiency with error% of the budget
				for point in data:
					if point["inefficiency"] <= budget:
						filtered_data.append(point)
					elif (point["inefficiency"] - budget) * 100 / budget <= thresh:
						filtered_data.append(point)

				#finding the point with highest performance among filtered data
				optimal_point = filtered_data[0]
				for point in filtered_data:
					if point["speedup"] > optimal_point["speedup"]:
						optimal_point = point

				local_rect_points=[]
				for point in filtered_data:
					if (optimal_point["speedup"] - point["speedup"]) < 0:
						print "something is wrong!"

					if (optimal_point["speedup"] - point["speedup"]) * 100 / optimal_point["speedup"] <= cluster_thresh:
						data_to_plot.append(point)
						sample_points.append(frontiers_data["data"].index(data))
						local_rect_points.append(point)

			cpufpoints = [cpuf for cpuf in [data_to_plot[sample]["cpu_freq"] for sample in range(len(data_to_plot))]]
			memfpoints = [memf for memf in [data_to_plot[sample]["mem_freq"] for sample in range(len(data_to_plot))]]
			samplepoints = sample_points

			#Total number of transitions
			num_transitions = 0.0
			#Available settings for (CPU, MEM)
			settings_available = Set()
			index = 0
			current_sample = -1
			length = 1
			lengths = []
			while index < len( samplepoints ):
				current_sample = samplepoints[index]

				#Construct the current settings
				current_settings = Set()
				while index < len( samplepoints ) and samplepoints[index] == current_sample:
					current_settings.add((cpufpoints[index],memfpoints[index]))
					index = index + 1

				#Compute the common points between current and what is available
				common_points  = current_settings.intersection(settings_available)

				# When there are no common points, transition
				if (len(common_points) == 0):
					settings_available = current_settings #all current settings are now available
					#Ignore the transition if it's the first sample
					if (current_sample!=0):
						num_transitions += 1
				else: # Continue with the common points
					settings_available = common_points

				# Record the length if we need to transition or we are at the end
				if (len(common_points) == 0 or index == len( samplepoints )):
					#Ignore the length if the transition is for the first sample
					if (current_sample!=0):
						lengths.append(length)
					#reset the length
					length = 1
				else: #otherwise, just increment the length
					length += 1
			box_data[threshold_index].append(lengths)

	ax = fig.add_subplot(1, 3, 3)
	y_max = 61
	width = 0.2123
	for threshold_index, cluster_thresh in enumerate(threshold_list):
#		pos = [i+0.8+threshold_index*0.2 for i in range(len(benchmarks))]
		pos = [i+0.65+threshold_index*0.27 for i in range(len(benchmarks))]
		for data in box_data[threshold_index]:
			sup_idx = box_data[threshold_index].index(data)
			idx =0
			for point in data:
				box_data[threshold_index][sup_idx][idx] += 1e-6 * idx
				idx += 1
				if point < 1:
					print point
					print "something is wrong!"

		bp = ax.boxplot(box_data[threshold_index], widths=width, positions = pos, whis=float('inf'))
		for i in range(len(benchmarks)):
			box = bp['boxes'][i]
			boxX = []
			boxY = []
			for j in range(len(benchmarks)-1):
				boxX.append(box.get_xdata()[j])
				boxY.append(box.get_ydata()[j])
			boxCoords = zip(boxX, boxY)
			boxPolygon = Polygon(boxCoords,	facecolor=box_colors[threshold_index])
			ax.add_patch(boxPolygon)

			# Now draw the median lines back over what we just filled in
			med = bp['medians'][i]
			medianX = []
			medianY = []
			for j in range(2):
				medianX.append(med.get_xdata()[j])
				medianY.append(med.get_ydata()[j])
				plt.plot(medianX, medianY, 'k')
			# Finally, overplot the sample averages, with horizontal alignment
			# in the center of each box
			plt.plot([np.average(med.get_xdata())], [np.average(box_data[threshold_index][i])], color='w', marker='*',markeredgecolor='k', markersize=5)

		for box in bp['boxes']:
			box.set(color=box_colors[threshold_index])

		for whiskers in bp['whiskers']:
			whiskers.set(color='b')

		for flier in bp['fliers']:
			flier.set(color='r', marker='x', markersize=int(2.5), markeredgecolor='r')

	#Get artists and labels for legend and chose which ones to display
	handles, labels = ax.get_legend_handles_labels()
	display = (0,1,2)

	simArtist=[]
	#Create custom artists
	for i in range(len(threshold_list)):
		simArtist.append(plt.Line2D((0,1),(0,0), linestyle='-', linewidth=3.0, color = box_colors[i]))

	#Create legend from custom artist/label lists
	ax.legend([handle for i,handle in enumerate(handles) if i in display]+[simArtist[0], simArtist[1], simArtist[2]],
    	      [label for i,label in enumerate(labels) if i in display]+[str(threshold_list[0])+'\%', str(threshold_list[1])+'\%', str(threshold_list[2])+'\%'], fontsize = legend_size, bbox_to_anchor=(1.02, 1), handlelength=1.0, loc=2, borderaxespad=0., handletextpad=0.2)

#	ax.text(1.2, 32, "37")
#	ax.text(4.2, 32, "38")
#	ax.text(6.2, 32, "59")
   	
   	ax.text( len(benchmarks)/2, -24, "(c)",fontsize=text_size)
	ax.set_xlim([0, len(benchmarks)+1])
	ax.set_ylim([-2, y_max])
	ax.yaxis.grid('on')

	x_ticks = xrange(1, len(benchmarks)+1, 1)
	ax.set_xticks(x_ticks)
	print labels
	ax.set_xticklabels(bmark_labels, rotation=45)
#	ax.set_yscale('log', nonposy='mask')
	
	ax.yaxis.set_ticks_position('left')
	ax.xaxis.set_ticks_position('bottom')

	outputf = os.path.join(os.path.join(output_dir_path, "stable_length_box"), "stable_length_box")
	fig.subplots_adjust(top=0.95, right=0.93, left=0.08, bottom=0.40, wspace=0.15)
	plt.savefig('%s.pdf' % (outputf))
	plt.close()

def main(argv):
	args = parse(argv)

	plot_stable_length_box(args, [1,3,5], [1.0, 1.2, 1.4, 1.6])

if __name__ == "__main__":
	main(sys.argv)