Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
packages=['tessierplot'],
install_requires=[
'matplotlib',
'pyperclip'
'pyperclip',
'six',
'pandas'
],
zip_safe=False)
200 changes: 170 additions & 30 deletions tessierplot/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import re
import numpy as np
from six.moves import xrange
import json

class parser(object):
def __init__(self):
self._header = None
self._header = None
self._data = None
def parse(self):
return self._data
Expand All @@ -19,7 +20,7 @@ def __init__(self,filename=None,filebuffer=None):
self._filebuffer = filebuffer

super(dat_parser,self).__init__()

def parse(self):
filebuffer = self._filebuffer
if filebuffer == None:
Expand All @@ -28,8 +29,9 @@ def parse(self):
else:
f = filebuffer
self._filebuffer = filebuffer

self._header,self._headerlength = self.parseheader()

self._data = pandas.read_csv(f,
sep='\t', \
comment='#',
Expand All @@ -39,8 +41,79 @@ def parse(self):

return super(dat_parser,self).parse()

def parse_header(self):
return None

def is_valid(self):
pass

class qcodes_parser(dat_parser):
def __init__(self,filename=None,filebuffer=None):
super(qcodes_parser,self).__init__(filename=filename,filebuffer=filebuffer)

def parse(self):
return super(qcodes_parser,self).parse()

def is_valid(self):
pass

def parseheader(self):
#read in the .json file
json_file = ''.join((os.path.dirname(self._file),'/snapshot.json'))
json_filebuffer = open(json_file)
json_s=json_filebuffer.read()

#read the column names from the .dat file
filebuffer = self._filebuffer
firstline = filebuffer.readline().decode('utf-8')
secondline = filebuffer.readline().decode('utf-8')

raw = r'\".*?\"'
reggy = re.compile(raw)
columnname = reggy.findall(secondline)
columnname = [i.replace('\"','') for i in columnname]

#look for the part where the data file meta info is stored
json_data = json.loads(json_s)
headerdict = json_data['arrays']
headervalues=[]
units = []
headerlength=0

for i,val in enumerate(headerdict):
if headerdict[val]['is_setpoint']:
line=[i,headerdict[val]['name'],'coordinate']
line_x = zip(['column','name','type'],line)
headervalues.append(line_x)
units.append(headerdict[val]['unit'])
else:
line=[i,headerdict[val]['name'],'value']
line_x = zip(['column','name','type'],line)
headervalues.append(line_x)

headervalues = [dict(x) for x in headervalues]

# sort according to the column order in the dat file
header=[]
for i, col in enumerate(columnname):
for j, h in enumerate(headervalues):
if col == h['name']:
header.append(h)
break

#set_trace()
return header,headerlength

class qtlab_parser(dat_parser):
def __init__(self,filename=None,filebuffer=None):
super(qtlab_parser,self).__init__(filename=filename,filebuffer=filebuffer)

def parse(self):
return super(qtlab_parser,self).parse()

def is_valid(self):
pass

def parseheader(self):
filebuffer = self._filebuffer
firstline = filebuffer.readline().decode()
Expand Down Expand Up @@ -118,28 +191,82 @@ def parseheader(self):

return header,headerlength

class gz_parser(dat_parser):
def __init__(self,filename,filebuffer=None):
self._file = filename
def factory_gz_parser(cls):
# parent class of gz_parser depends on which kind of data file we have
class gz_parser(cls):
def __init__(self,filename,filebuffer=None):
self._file = filename

import gzip
f = open(self._file,'rb')
if (f.read(2) == b'\x1f\x8b'):
f.seek(0)
gz = super(gz_parser,self).__init__(filename=filename,filebuffer=gzip.GzipFile(fileobj=f))
return gz
else:
#raise Exception('Not a valid gzip file')
print('Not a valid gzip file')
gz = super(gz_parser,self).__init__(filename=filename,filebuffer=None)
return gz

return gz_parser

#class for supported filetypes, handles which parser class to call
class filetype():
def __init__(self,filepath=None):
self._parser = None
self._filepath = filepath

#is there a snapshot.json file in the directory?
#if yes, we can assume it's a qcodes measurement file
json_file = ''.join((os.path.dirname(filepath),'/snapshot.json'))
set_file = self.getsetfilepath(filepath)

import gzip
f = open(self._file,'rb')
if (f.read(2) == b'\x1f\x8b'):
f.seek(0)
gz = super(gz_parser,self).__init__(filename=filename,filebuffer=gzip.GzipFile(fileobj=f))
return gz
if os.path.exists(json_file):
self._datparser = qcodes_parser
elif os.path.exists(set_file):
self._datparser = qtlab_parser
else:
raise Exception('Not a valid gzip file')
self._datparser = dat_parser

self._FILETYPES = {
'.dat': self._datparser, # link the correct parser to .dat files
'.dat.gz': factory_gz_parser(self._datparser) # let the gz parser class have the right parent
}

def get_parser(self):
ftype = self.get_filetype()
for f in self._FILETYPES.keys():
if f == ftype:
return self._FILETYPES[f]
else:
raise('No valid filetype')

return None

@classmethod
def getsetfilepath(cls,filepath=''):
file_Path, file_Extension = os.path.splitext(filepath)
if file_Extension == '.gz':
file_Path = os.path.splitext(file_Path)[0]
elif file_Extension != '.dat':
print('Wrong file extension')
setfilepath = file_Path + '.set'

if not os.path.exists(setfilepath):
setfilepath = None

return setfilepath

def get_filetype(self):
for ext in self._FILETYPES.keys():
if self._filepath.endswith(ext):
return ext

return None

class Data(pandas.DataFrame):
#supported filetypes
_FILETYPES = {
'.dat': dat_parser,
'.dat.gz': gz_parser
}


def __init__(self,*args,**kwargs):
#args: filepath, sort
#filepath = kwargs.pop('filepath',None)
Expand All @@ -154,38 +281,43 @@ def __init__(self,*args,**kwargs):

@property
def _constructor(self):

return Data

@classmethod
def determine_filetype(cls,filepath):
for ext in cls._FILETYPES.keys():
if filepath.endswith(ext):
return ext
return None
ftype = filetype(filepath=filepath)

return ftype.get_filetype()

@classmethod
def load_header_only(cls,filepath):
parser = cls.determine_parser(filepath)
p = parser(filename=filepath,filebuffer=open(filepath,mode='rb'))
if p._filebuffer is None:
p = None
return None
header,headerlength = p.parseheader()
df = Data()
df._header = header

return df

@classmethod
def determine_parser(cls,filepath):
ftype = cls.determine_filetype(filepath)

if ftype is not None:
parser = cls._FILETYPES[ftype]
else:
raise Exception('Unknown filetype')
ftype = filetype(filepath=filepath)
parser = ftype.get_parser()

return parser

@classmethod
def load_file(cls,filepath):
parser = cls.determine_parser(filepath)
p = parser(filepath)
p = parser(filename=filepath,filebuffer=open(filepath,mode='rb'))

if p._filebuffer is None:
p = None
return None,None
p.parse()
return p._data,p._header

Expand All @@ -195,15 +327,19 @@ def from_file(cls, filepath):

newdataframe = Data(dat)
newdataframe._header = header

return newdataframe

@property
def coordkeys(self):
coord_keys = [i['name'] for i in self._header if i['type']=='coordinate' ]

return coord_keys

@property
def valuekeys(self):
value_keys = [i['name'] for i in self._header if i['type']=='value' ]

return value_keys

@property
Expand All @@ -212,14 +348,17 @@ def sorted_data(self):
#sort the data from the last coordinate column backwards
self._sorted_data = self.sort_values(by=self.coordkeys)
self._sorted_data = self._sorted_data.dropna(how='any')

return self._sorted_data

@property
def ndim_sparse(self):
#returns the amount of columns with more than one unique value in it
dims = np.array(self.dims)
nDim = len(dims[dims > 1])

return nDim

@property
def dims(self):
#returns an array with the amount of unique values of each coordinate column
Expand All @@ -231,6 +370,7 @@ def dims(self):
for i in cols:
col = getattr(self.sorted_data,i['name'])
dims = np.hstack( ( dims ,len(col.unique()) ) )

return dims

def make_filter_from_uniques_in_columns(self,columns):
Expand Down
Loading