from scipy import c_, arange
from scipy.io import read_array, write_array
import pickle, csv

class dbase:
	"""
	A simple data-frame, that reads and write csv/pickle files with variable names.
	Columns in the data can be accessed using x.get('a','c') where 'a' and 'c' are
	variable names.
	"""

	def __init__(self,f):
		"""
		Initializing the dbase class. Loading file f.
		"""
		self.load(f)

	def load(self,fname):
		"""
		Loading data from a csv file or a pickle of the dbase class
		"""
		fext = self.__ext(fname)
		f = open(fname,'r')
		if fext == 'csv':
			self.varnm = self.vardic(f)
			self.data = read_array(f, separator=',', lines=(0,-1))
		elif fext == 'pickle':
			a = pickle.load(f)
			self.varnm = a.varnm
			self.data = a.data
		else:
			raise 'This class only works on csv and pickle files'
		f.close()

	def dump(self,fname):
		"""
		Dumping the instance of the class into csv or pickle file
		"""
		fext = self.__ext(fname)
		f = open(fname,'w')
		if fext == 'csv':
			writer = csv.writer(f)
			writer.writerow(self.varnm.keys())
			writer.writerows(self.data)
		elif fext == 'pickle':
			pickle.dump(self,f)
		else:
			raise 'This class only outputs pickle files'
		f.close()
	
	def __ext(self,fname):
		"""
		Finding the file extension of the filename passed to dbase
		"""
		return fname.split('.')[-1].strip()

	def vardic(self,f):
		"""
		Making a dictionary with variable names and indices
		"""
		dic = {}; j = 0

		# reading only the 1st line in the file and extracting variables names
		# names are linked in the dictionary to their, and the data's, index
		# making sure to strip leading and trailing white space
		for i in f.readline().split(','):
			dic[i.strip()] = j
			j += 1
	
		return dic

	def get(self,*var):
		"""
		Selecting a column based on variable labels. Assumes data are in columns.
		"""
		a = self.data[:,self.varnm[var[0]]]				# getting the data for the 1st element in self.data

		for i in var[1:]:						
			a = c_[a,self.data[:,self.varnm[i]]]		# concatenate column-wise, along last axis
	
		return a

if __name__ == '__main__':

	# creating simulated data and variable labels
	varnm = ['a','b','c']						# variable labels
	data = arange(15).reshape(5,3)				# the data array
	f = open('data.csv','w')
	writer = csv.writer(f)
	writer.writerow(varnm)
	writer.writerows(data)
	f.close()

	# loading the data from the csv file and dumping the dbase class instance to a pickle file
	a = dbase("data.csv")
	a.dump("data.pickle")

	# loading the object from the pickle file
	print "\nLoading the dbase object from a pickle file\n"

	b = dbase("data.pickle")

	print "Data from dbase class\n", b.data
	print "\nVariable names from dbase class\n", b.varnm
	print "\nTwo columns selected using variable names\n", b.get('a','c')
	print "\nSaving data and variable names to a different csv file\n", b.dump("data_dump.csv")
