diff --git a/setup.py b/setup.py index 8660bc0..e38724e 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,8 @@ packages=['tessierplot'], install_requires=[ 'matplotlib', - 'pyperclip' + 'pyperclip', + 'six', + 'pandas' ], zip_safe=False) \ No newline at end of file diff --git a/tessierplot/data.py b/tessierplot/data.py index 4b67411..23f5daf 100644 --- a/tessierplot/data.py +++ b/tessierplot/data.py @@ -3,10 +3,11 @@ import re import numpy as np from six.moves import xrange +import json class parser(object): def __init__(self): - self._header = None + self._header = None self._data = None def parse(self): return self._data @@ -19,7 +20,7 @@ def __init__(self,filename=None,filebuffer=None): self._filebuffer = filebuffer super(dat_parser,self).__init__() - + def parse(self): filebuffer = self._filebuffer if filebuffer == None: @@ -28,8 +29,9 @@ def parse(self): else: f = filebuffer self._filebuffer = filebuffer + self._header,self._headerlength = self.parseheader() - + self._data = pandas.read_csv(f, sep='\t', \ comment='#', @@ -39,8 +41,79 @@ def parse(self): return super(dat_parser,self).parse() + def parse_header(self): + return None + + def is_valid(self): + pass + +class qcodes_parser(dat_parser): + def __init__(self,filename=None,filebuffer=None): + super(qcodes_parser,self).__init__(filename=filename,filebuffer=filebuffer) + + def parse(self): + return super(qcodes_parser,self).parse() + def is_valid(self): pass + + def parseheader(self): + #read in the .json file + json_file = ''.join((os.path.dirname(self._file),'/snapshot.json')) + json_filebuffer = open(json_file) + json_s=json_filebuffer.read() + + #read the column names from the .dat file + filebuffer = self._filebuffer + firstline = filebuffer.readline().decode('utf-8') + secondline = filebuffer.readline().decode('utf-8') + + raw = r'\".*?\"' + reggy = re.compile(raw) + columnname = reggy.findall(secondline) + columnname = [i.replace('\"','') for i in columnname] + + #look for the part where the data file meta info is stored + json_data = json.loads(json_s) + headerdict = json_data['arrays'] + headervalues=[] + units = [] + headerlength=0 + + for i,val in enumerate(headerdict): + if headerdict[val]['is_setpoint']: + line=[i,headerdict[val]['name'],'coordinate'] + line_x = zip(['column','name','type'],line) + headervalues.append(line_x) + units.append(headerdict[val]['unit']) + else: + line=[i,headerdict[val]['name'],'value'] + line_x = zip(['column','name','type'],line) + headervalues.append(line_x) + + headervalues = [dict(x) for x in headervalues] + + # sort according to the column order in the dat file + header=[] + for i, col in enumerate(columnname): + for j, h in enumerate(headervalues): + if col == h['name']: + header.append(h) + break + + #set_trace() + return header,headerlength + +class qtlab_parser(dat_parser): + def __init__(self,filename=None,filebuffer=None): + super(qtlab_parser,self).__init__(filename=filename,filebuffer=filebuffer) + + def parse(self): + return super(qtlab_parser,self).parse() + + def is_valid(self): + pass + def parseheader(self): filebuffer = self._filebuffer firstline = filebuffer.readline().decode() @@ -118,28 +191,82 @@ def parseheader(self): return header,headerlength -class gz_parser(dat_parser): - def __init__(self,filename,filebuffer=None): - self._file = filename +def factory_gz_parser(cls): + # parent class of gz_parser depends on which kind of data file we have + class gz_parser(cls): + def __init__(self,filename,filebuffer=None): + self._file = filename + + import gzip + f = open(self._file,'rb') + if (f.read(2) == b'\x1f\x8b'): + f.seek(0) + gz = super(gz_parser,self).__init__(filename=filename,filebuffer=gzip.GzipFile(fileobj=f)) + return gz + else: + #raise Exception('Not a valid gzip file') + print('Not a valid gzip file') + gz = super(gz_parser,self).__init__(filename=filename,filebuffer=None) + return gz + + return gz_parser + +#class for supported filetypes, handles which parser class to call +class filetype(): + def __init__(self,filepath=None): + self._parser = None + self._filepath = filepath + + #is there a snapshot.json file in the directory? + #if yes, we can assume it's a qcodes measurement file + json_file = ''.join((os.path.dirname(filepath),'/snapshot.json')) + set_file = self.getsetfilepath(filepath) - import gzip - f = open(self._file,'rb') - if (f.read(2) == b'\x1f\x8b'): - f.seek(0) - gz = super(gz_parser,self).__init__(filename=filename,filebuffer=gzip.GzipFile(fileobj=f)) - return gz + if os.path.exists(json_file): + self._datparser = qcodes_parser + elif os.path.exists(set_file): + self._datparser = qtlab_parser else: - raise Exception('Not a valid gzip file') + self._datparser = dat_parser + + self._FILETYPES = { + '.dat': self._datparser, # link the correct parser to .dat files + '.dat.gz': factory_gz_parser(self._datparser) # let the gz parser class have the right parent + } + + def get_parser(self): + ftype = self.get_filetype() + for f in self._FILETYPES.keys(): + if f == ftype: + return self._FILETYPES[f] + else: + raise('No valid filetype') + + return None + + @classmethod + def getsetfilepath(cls,filepath=''): + file_Path, file_Extension = os.path.splitext(filepath) + if file_Extension == '.gz': + file_Path = os.path.splitext(file_Path)[0] + elif file_Extension != '.dat': + print('Wrong file extension') + setfilepath = file_Path + '.set' + if not os.path.exists(setfilepath): + setfilepath = None + return setfilepath + + def get_filetype(self): + for ext in self._FILETYPES.keys(): + if self._filepath.endswith(ext): + return ext + + return None class Data(pandas.DataFrame): - #supported filetypes - _FILETYPES = { - '.dat': dat_parser, - '.dat.gz': gz_parser - } - + def __init__(self,*args,**kwargs): #args: filepath, sort #filepath = kwargs.pop('filepath',None) @@ -154,38 +281,43 @@ def __init__(self,*args,**kwargs): @property def _constructor(self): + return Data @classmethod def determine_filetype(cls,filepath): - for ext in cls._FILETYPES.keys(): - if filepath.endswith(ext): - return ext - return None + ftype = filetype(filepath=filepath) + + return ftype.get_filetype() @classmethod def load_header_only(cls,filepath): parser = cls.determine_parser(filepath) p = parser(filename=filepath,filebuffer=open(filepath,mode='rb')) + if p._filebuffer is None: + p = None + return None header,headerlength = p.parseheader() df = Data() df._header = header + return df @classmethod def determine_parser(cls,filepath): - ftype = cls.determine_filetype(filepath) - - if ftype is not None: - parser = cls._FILETYPES[ftype] - else: - raise Exception('Unknown filetype') + ftype = filetype(filepath=filepath) + parser = ftype.get_parser() + return parser @classmethod def load_file(cls,filepath): parser = cls.determine_parser(filepath) - p = parser(filepath) + p = parser(filename=filepath,filebuffer=open(filepath,mode='rb')) + + if p._filebuffer is None: + p = None + return None,None p.parse() return p._data,p._header @@ -195,15 +327,19 @@ def from_file(cls, filepath): newdataframe = Data(dat) newdataframe._header = header + return newdataframe @property def coordkeys(self): coord_keys = [i['name'] for i in self._header if i['type']=='coordinate' ] + return coord_keys + @property def valuekeys(self): value_keys = [i['name'] for i in self._header if i['type']=='value' ] + return value_keys @property @@ -212,6 +348,7 @@ def sorted_data(self): #sort the data from the last coordinate column backwards self._sorted_data = self.sort_values(by=self.coordkeys) self._sorted_data = self._sorted_data.dropna(how='any') + return self._sorted_data @property @@ -219,7 +356,9 @@ def ndim_sparse(self): #returns the amount of columns with more than one unique value in it dims = np.array(self.dims) nDim = len(dims[dims > 1]) + return nDim + @property def dims(self): #returns an array with the amount of unique values of each coordinate column @@ -231,6 +370,7 @@ def dims(self): for i in cols: col = getattr(self.sorted_data,i['name']) dims = np.hstack( ( dims ,len(col.unique()) ) ) + return dims def make_filter_from_uniques_in_columns(self,columns): diff --git a/tessierplot/plot.py b/tessierplot/plot.py index 1a10a64..a6c33a5 100644 --- a/tessierplot/plot.py +++ b/tessierplot/plot.py @@ -172,6 +172,8 @@ def plot3d(self, massage_func=None, ax_destination=None, n_index=None, style='log', + xlims_manual=None, + ylims_manual=None, clim=None, aspect='auto', interpolation='nearest', @@ -203,7 +205,7 @@ def plot3d(self, massage_func=None, n_subplots = len(n_index) if n_subplots > 1: - width = 2 + width = 1 else: width = 1 n_valueaxes = len(self.data.valuekeys) @@ -213,10 +215,11 @@ def plot3d(self, massage_func=None, else: value_axes = list([value_axis]) - width = len(value_axes) - n_subplots = n_subplots *width - gs = gridspec.GridSpec(int(n_subplots/width)+n_subplots%width, width) - + width = 1#len(value_axes) + height = len(value_axes) + n_subplots = n_subplots *width#int(n_subplots/width)+n_subplots%width + #gs = gridspec.GridSpec(int(n_subplots/width)+n_subplots%width, width) + gs = gridspec.GridSpec(height,width) cnt=0 #subplot counter #enumerate over the generated list of unique values specified in the uniques columns @@ -256,6 +259,9 @@ def plot3d(self, massage_func=None, #or the first since there has been sorting #this doesnt work for e.g. a hilbert measurement + if x.index[0] > x.index[-1]: + sweepoverride = True + print('xu: {:d}, yu: {:d}, lenz: {:d} after trimming'.format(xu,yu,len(z))) #sorting sorts negative to positive, so beware: #sweep direction determines which part of array should be cut off @@ -268,7 +274,7 @@ def plot3d(self, massage_func=None, x = x[:xu*yu] y = y[:xu*yu] - XX = np.reshape(z,(xu,yu)) + XX = z.values.reshape(xu,yu) self.x = x self.y = y @@ -276,10 +282,28 @@ def plot3d(self, massage_func=None, #now set the lims xlims = (x.min(),x.max()) ylims = (y.min(),y.max()) + xnew = xlims + ynew = ylims + + if not (xlims_manual == None): + xnew = xlims + if xlims_manual[0] > xlims[0]: + xnew[0] = xlims_manual[0] + + if xlims_manual[1] > xlims[1]: + xnew[1] = xlims_manual[1] + + if not (ylims_manual == None): + ynew = ylims + if ylims_manual[0] > ylims[0]: + ynew[0] = ylims_manual[0] + + if ylims_manual[1] > ylims[1]: + ynew[1] = ylims_manual[1] #determine stepsize for di/dv, inprincipe only y step is used (ie. the diff is also taken in this direction and the measurement swept..) - xstep = float(xlims[1] - xlims[0])/xu - ystep = float(ylims[1] - ylims[0])/yu + xstep = float(xlims[1] - xlims[0])/(xu-1) + ystep = float(ylims[1] - ylims[0])/(yu-1) ext = xlims+ylims self.extent = ext @@ -314,7 +338,7 @@ def plot3d(self, massage_func=None, measAxisDesignation = parseUnitAndNameFromColumnName(value_keys[value_axis]) #wrap all needed arguments in a datastructure if measAxisDesignation: - if measAxisDesignation[1]: + if measAxisDesignation[1] and not isinstance(measAxisDesignation,str): cbar_quantity = measAxisDesignation[0] cbar_unit = measAxisDesignation[1] else: @@ -416,7 +440,7 @@ def plot2d(self,fiddle=False, value_axis = -1, style=['normal'], uniques_col_str=[], - legend=True, + legend=False, ax_destination=None, subplots_args={'top':0.96, 'bottom':0.17, 'left':0.14, 'right':0.85,'hspace':0.0}, massage_func=None, @@ -425,7 +449,7 @@ def plot2d(self,fiddle=False, if not self.fig and not ax_destination: self.fig = plt.figure() self.fig.subplots_adjust(**subplots_args) - + #determine how many subplots we need n_subplots = 1 coord_keys = self.data.coordkeys @@ -461,7 +485,7 @@ def plot2d(self,fiddle=False, width = len(value_axes) n_subplots = n_subplots * width - gs = gridspec.GridSpec(int(n_subplots/width)+n_subplots%width, width) + gs = gridspec.GridSpec(width,int(n_subplots/width)+n_subplots%width) uniques_axis_designations = [] #do some filtering of the colstr to get separate name and unit of said name @@ -471,7 +495,7 @@ def plot2d(self,fiddle=False, if n_index is not None: n_index = np.array(n_index) n_subplots = len(n_index) - + ax = None for i,j in enumerate(self.data.make_filter_from_uniques_in_columns(uniques_col_str)): @@ -483,39 +507,42 @@ def plot2d(self,fiddle=False, #get the columns /not/ corresponding to uniques_cols #find the coord_keys in the header coord_keys = self.data.coordkeys - + #filter out the keys corresponding to unique value columns us=uniques_col_str coord_keys = [key for key in coord_keys if key not in uniques_col_str] #now find out if there are multiple value axes value_keys = self.data.valuekeys - + x=data.loc[:,coord_keys[-1]] y=data.loc[:,value_keys[value_axis]] - + title ='' for i,z in enumerate(uniques_col_str): - title = '\n'.join([title, '{:s}: {:g}'.format(uniques_axis_designations[i],data[z].iloc[0])]) + pass + # this crashes sometimes. did not investiagte yet what the problem is. switched off in the meantime + #title = '\n'.join([title, '{:s}: {:g}'.format(uniques_axis_designations[i],data[z].iloc[0])]) wrap = styles.getPopulatedWrap(style) wrap['XX'] = y wrap['X'] = x wrap['massage_func'] = massage_func styles.processStyle(style,wrap) + if ax_destination: ax = ax_destination else: ax = plt.subplot(gs[k]) ax.plot(wrap['X'],wrap['XX'],label=title,**kwargs) - + if legend: plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) - ax = self.fig.axes[0] + #ax = self.fig.axes[0] xaxislabel = parseUnitAndNameFromColumnName(coord_keys[-1]) yaxislabel = parseUnitAndNameFromColumnName(value_keys[value_axis]) - + if xaxislabel: if not isinstance(xaxislabel, np.ndarray): xaxisquantity = xaxislabel diff --git a/tessierplot/view.py b/tessierplot/view.py index 1e4d9aa..782cc7e 100644 --- a/tessierplot/view.py +++ b/tessierplot/view.py @@ -7,27 +7,36 @@ from itertools import chain import numpy as np import re -import win32api from IPython.display import VimeoVideo from IPython.display import display, HTML, display_html +if os.name == 'nt': + import win32api + +import imp +imp.reload(ts) + plotstyle = 'normal' #pylab.rcParams['legend.linewidth'] = 10 def getthumbcachepath(file): oneupdir = os.path.abspath(os.path.join(os.path.dirname(file),os.pardir)) datedir = os.path.split(oneupdir)[1] #directory name should be datedir, if not - if re.match('[0-9]{8}',datedir): - preid= datedir - else: - preid = '' - + #relative to project/working directory - cachepath = os.path.normpath(os.path.join(os.getcwd(),'thumbnails', preid + '_'+os.path.splitext(os.path.split(file)[1])[0] + '_thumb.png')) + cachepath = os.path.normpath(os.path.join(os.getcwd(),'thumbnails', datedir + '_'+os.path.split(os.path.dirname(file))[1] + '_thumb.png')) + return cachepath def getthumbdatapath(file): - thumbdatapath = os.path.splitext(file)[0] + '_thumb.png' + f,ext = os.path.splitext(file) + + # make sure we have stripped of all wrapping file extesions due to e.g. zipping + while not ext=='': + f,ext = os.path.splitext(f) + + thumbdatapath = f + '_thumb.png' + return thumbdatapath class tessierView(object): @@ -52,19 +61,7 @@ def __init__(self, rootdir='./', filemask='.*\.dat(?:\.gz)?$',filterstring='',ov def on(self): print('You are now watching through the glasses of ideology') display(VimeoVideo('106036638')) - - def getsetfilepath(self,filename): - file_Path, file_Extension = os.path.splitext(filename) - if file_Extension == '.gz': - file_Path = os.path.splitext(file_Path)[0] - elif file_Extension != '.dat': - print('Wrong file extension') - setfilepath = file_Path + '.set' - - if not os.path.exists(setfilepath): - setfilepath = None - - return setfilepath + def makethumbnail(self, filename,override=False,style=[]): #create a thumbnail and store it in the same directory and in the thumbnails dir for local file serving, override options for if file already exists thumbfile = getthumbcachepath(filename) @@ -105,7 +102,7 @@ def makethumbnail(self, filename,override=False,style=[]): thumbfile = None #if fail no thumbfile was created print('Error {:s} for file {:s}'.format(str(e),filename)) pass - + return thumbfile @@ -122,7 +119,7 @@ def walklevel(self,some_dir, level=1): def walk(self, filemask, filterstring, headercheck=None,**kwargs): paths = (self._root,) images = 0 - self.allthumbs = [] + self._allthumbs = [] reg = re.compile(self._filemask) #get only files determined by filemask for root,dirnames,filenames in chain.from_iterable(os.walk(path) for path in paths): @@ -140,37 +137,38 @@ def walk(self, filemask, filterstring, headercheck=None,**kwargs): # check if filterstring can be found in the path isinfilterstring = filterstring.lower() in fullpath.lower() - - fullname,fullext = os.path.split(fullpath) - fullname = win32api.GetShortPathName(fullname) - fullpath = fullname + '/' + fullext dir,basename = os.path.split(fullpath) + + #extract the directory which is the date of measurement + datedir = os.path.basename(os.path.normpath(dir+'/../')) + + if os.name == 'nt': # avoid problems with very long path names in windows + dir = win32api.GetShortPathName(dir) + fullpath = dir + '/' + basename measname,ext1 = os.path.splitext(basename) #dirty, if filename ends e.g. in gz, also chops off the second extension measname,ext2 = os.path.splitext(measname) ext = ext2+ext1 - - #extract the directory which is the date of measurement - datedir = os.path.basename(os.path.normpath(dir+'/../')) #check if filterstring can be found in the set file (e.g. 'dac4: 1337.0') if not isinfilterstring: - setfilepath = self.getsetfilepath(fullpath) + setfilepath = data.filetype.getsetfilepath(fullpath) if setfilepath: # only check for filterstring if set file exists isinfilterstring = filterstring in open(setfilepath).read() - if isinfilterstring: #liable for improvement + if isinfilterstring: #liable for improvement if self._showfilenames: print(fullpath) df = data.Data.load_header_only(fullpath) if headercheck is None or df.coordkeys[-2] == headercheck: thumbpath = self.makethumbnail(fullpath,**kwargs) - + if thumbpath: + thumbpath_html = thumbpath.replace('#','%23') # html doesn not like number signs in file paths self._allthumbs.append({'datapath':fullpath, - 'thumbpath':thumbpath, + 'thumbpath':thumbpath_html, 'datedir':datedir, 'measname':measname}) images += 1