Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 26 additions & 19 deletions moldscript/fukui.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

import sys, os
import time
import datetime
import cclib as cc
from collections import defaultdict
from moldscript.argument_parser import load_variables

from moldscript.utils import add_cpu_times

class fukui:
"""
Expand All @@ -33,6 +34,10 @@ def __init__(self, data, create_dat=True, **kwargs):

if create_dat:
elapsed_time = round(time.time() - start_time_overall, 2)
try:
total_cpu = add_cpu_times(self.file_data)
self.args.log.write(f"\n Fukui calculations complete in {total_cpu} seconds")
except: pass
self.args.log.write(
f"-- Fukui Parameter Collection complete in {elapsed_time} seconds\n"
)
Expand All @@ -43,37 +48,31 @@ def get_data(self):
file_data = mydict()
first = False

self.args.log.write(
f"-- Fukui Parameter Collection starting"
)

for file_name in self.data.keys():
for i, file_name in enumerate(self.data.keys()):
neutral_data, oxidized_data, reduced_data = None, None, None
if "neutral" in self.data[file_name].keys():
neutral_data = self.parse_cc_data(
file_name, self.data[file_name]["neutral"]
)
if first == False and self.args.program=='gaussian':
self.args.log.write(f" Package used: {neutral_data.metadata['package']} {neutral_data.metadata['package_version']}")
self.args.log.write(f" Functional used: {neutral_data.metadata['functional']}")
self.args.log.write(f" Basis set used: {neutral_data.metadata['basis_set']}\n")
first = True

if "oxidized" in self.data[file_name].keys():
oxidized_data = self.parse_cc_data(
file_name, self.data[file_name]["oxidized"]
)
if first == False and self.args.program=='gaussian':
self.args.log.write(f" Functional used: {oxidized_data.metadata['functional']}")
self.args.log.write(f" Basis set used: {oxidized_data.metadata['basis_set']}")
first = True

if "reduced" in self.data[file_name].keys():
reduced_data = self.parse_cc_data(
file_name, self.data[file_name]["reduced"]
)
if first == False and self.args.program=='gaussian':
self.args.log.write(f" Functional used: {reduced_data.metadata['functional']}")
self.args.log.write(f" Basis set used: {reduced_data.metadata['basis_set']}")
first = True

if i==0:
rel_dir = self.data[file_name]["neutral"].split(os.getcwd()+'/')[1].split(file_name)[0]
self.args.log.write(
f"-- Fukui Parameter Collection from {rel_dir}"
)
self.args.log.write(f" Package used: {neutral_data.metadata['package']} {neutral_data.metadata['package_version']}")
self.args.log.write(f" Functional used: {neutral_data.metadata['functional']}")
self.args.log.write(f" Basis set used: {neutral_data.metadata['basis_set']}\n")

if neutral_data != None and oxidized_data != None and reduced_data != None:
self.args.log.write(
Expand Down Expand Up @@ -114,6 +113,14 @@ def get_data(self):
f"x Skipping file {file_name} as one either neutral, oxidized or reduced does not exist!"
)

file_data[file_name]['cpu_time'] = datetime.timedelta(0) # initialize cpu time
for time in neutral_data.metadata['cpu_time']:
file_data[file_name]['cpu_time'] += time # add cpu time from IE
for time in oxidized_data.metadata['cpu_time']:
file_data[file_name]['cpu_time'] += time # add cpu time from EA
for time in reduced_data.metadata['cpu_time']:
file_data[file_name]['cpu_time'] += time # add cpu time from EA

return file_data

def parse_cc_data(self, file_name, file):
Expand Down
78 changes: 54 additions & 24 deletions moldscript/get_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,16 @@ def get_bond_df(self):
for filename in self.substructure.keys():
dict = self.substructure[filename]
struc = list(dict.keys())[0]
filename = self.file_base(filename)
#filename = self.file_base(filename)
temp_df = bond_df.loc[bond_df['species'] == filename]
try:
indexes = list(dict[struc]['index'][0])
except:
print(f'{filename}: substructure not found, omitting from final bond df\n')
continue
filtered_df = temp_df.loc[temp_df['atom1'].isin(indexes) | temp_df['atom2'].isin(indexes)]
#print(filename, indexes)
filtered_df = bond_df.loc[bond_df['atom1'].isin(indexes) & bond_df['atom2'].isin(indexes) & bond_df['species'].isin([filename])]
#print(filtered_df)
final_df = pd.concat([final_df, filtered_df])

final_df.to_csv(bond_csv, index=False)
Expand All @@ -116,49 +118,56 @@ def get_atom_df(self):
for category in atom_list:
if category in calced_list:
dict = self.dd[category].file_data

if category == 'nbo':
print(' - NPA charges (au)')
print(' - Wiberg bond orders/atom')
dict_df = {k: [] for k in ['species', 'atom_index', 'atom_type', 'npa_charge', 'wiberg_total']}

for filename in dict.keys():

charges = list(dict[filename]['charges']['npa'])
bond_orders = list(dict[filename]['bond_orders'])
#print(len(charges))
atoms = list(dict[filename]['atomnos'])
for charge, bo, num in zip(charges, bond_orders, atoms):

for i, (charge, bo, num) in enumerate(zip(charges, bond_orders, atoms)):
dict_df['wiberg_total'].append(bo)
dict_df['npa_charge'].append(charge)
dict_df['species'].append(filename)
dict_df['atom_index'].append(charges.index(charge))
dict_df['atom_index'].append(i)
element = periodictable.elements[num]
dict_df['atom_type'].append(element.symbol)

elif category == 'nmr':
print(' - Chemical Shifts')
dict_df = {k: [] for k in ['species', 'atom_index', 'atom_type', 'nmr_shielding']}

for filename in dict.keys():

shields = list(dict[filename]['nmr_shielding'])
atoms = list(dict[filename]['atomnos'])
for shield, num in zip(shields, atoms):

for i, (shield, num) in enumerate(zip(shields, atoms)):
dict_df['nmr_shielding'].append(shield)
dict_df['species'].append(filename)
dict_df['atom_index'].append(shields.index(shield))
dict_df['atom_index'].append(i)
element = periodictable.elements[num]
dict_df['atom_type'].append(element.symbol)

elif category == 'fukui':
print(' - CM5 charges (au)')
print(' - Hirshfeld charges (au)')
print(' - Electrophilic, nucleophilic and radical Fukui functions (au)')

if self.program=='gaussian':
dict_df = {k: [] for k in ['species', 'atom_index', 'atom_type', 'cm5_charge', 'hirshfeld_charge', 'ox_npa_charge', 'ox_cm5_charge', 'ox_hirshfeld_charge', 'red_npa_charge', 'red_cm5_charge', 'red_hirshfeld_charge', 'fukui_plus', 'fukui_minus', 'fukui_rad']}
if self.program=='orca':
dict_df = {k: [] for k in ['species', 'atom_index', 'atom_type', 'hirshfeld_charge', 'ox_hirshfeld_charge', 'red_hirshfeld_charge', 'fukui_plus', 'fukui_minus', 'fukui_rad']}
for filename in dict.keys():


for filename in dict.keys():
neut_hirsfeld = list(dict[filename]["neutral"]["atomcharges"]["hirsfeld"])
atoms = list(dict[filename]['atomnos'])
for atom, num in zip(neut_hirsfeld, atoms):
for i, (atom, num) in enumerate(zip(neut_hirsfeld, atoms)):
dict_df['species'].append(filename)
dict_df['atom_index'].append(neut_hirsfeld.index(atom))
dict_df['atom_index'].append(i)
element = periodictable.elements[num]
dict_df['atom_type'].append(element.symbol)
if self.program =='gaussian':
Expand Down Expand Up @@ -201,20 +210,23 @@ def get_atom_df(self):
atom_df = dict_df
else:
atom_df = atom_df.merge(dict_df,how='left', on=['species','atom_index', 'atom_type'])

if self.substructure != '':
print(' ! Filtered by user-defined substructure')
final_df = pd.DataFrame()
for filename in self.substructure.keys():
dict = self.substructure[filename]
struc = list(dict.keys())[0]

basename = self.file_base(filename)
temp_df = atom_df.loc[atom_df['species'] == basename]
try:
indexes = list(dict[struc]['index'][0])
except:
print(f'{basename}: substructure not found, omitting from final df\n')
continue
filtered_df = temp_df.loc[temp_df['atom_index'].isin(indexes)]

filtered_df = atom_df.loc[atom_df['atom_index'].isin(indexes) & atom_df['species'].isin([filename])]
final_df = pd.concat([final_df, filtered_df])

final_df.to_csv(atom_csv, index=False)
Expand Down Expand Up @@ -264,20 +276,28 @@ def get_mol_df(self):
dict_df['LUMO'].append(final_dict['LUMO'])
dict_df['HOMO-LUMO_gap'].append(final_dict['HOMO-LUMO_gap'])
mol_df = pd.DataFrame(dict_df)

if 'sp_ieea' in calced_list:
print(' - Electronegativity (eV)')
print(' - Hardness (eV)')
print(' - Global Electrophilicity Index: GEI (eV)')
print(' - Vertical ionization Potential (eV)')
print(' - Vertical electron Affinity (eV)')
sp_ieea_dict = self.dd['sp_ieea'].file_data
dict_df = {k: [] for k in ['species', 'SP_ox_energy','SP_red_energy', 'chemical_hardness', 'global_electrophilicity', 'electronegativity']}
dict_df = {k: [] for k in ['species', 'IE_vertical','EA_vertical', 'chemical_hardness', 'global_electrophilicity', 'electronegativity']}
for file_name in sp_ieea_dict.keys():
final_dict = sp_ieea_dict[file_name]
neut_row = mol_df.loc[mol_df['species'] == file_name]
neut_e = list(neut_row['energy'])[0]
oxidized_e = final_dict['ox']['E']
reduced_e = final_dict['red']['E']
oe = oxidized_e - neut_e
re = reduced_e - neut_e

oe = (oxidized_e - neut_e)
re = (reduced_e - neut_e)

dict_df['species'].append(file_name)
dict_df['SP_ox_energy'].append(oe)
dict_df['SP_red_energy'].append(re)
dict_df['IE_vertical'].append(oe)
dict_df['EA_vertical'].append(re)
cp = -1*(oe+re)/2
hardness = (oe-re)/2
electrophilicity = cp**2 / (2*hardness)
Expand All @@ -287,22 +307,32 @@ def get_mol_df(self):
dict_df['electronegativity'].append(electronegativity)
dict_df = pd.DataFrame(dict_df)
mol_df = mol_df.merge(dict_df,how='left', on='species')

if 'ad_ieea' in calced_list:
print(' - Electronegativity (eV)')
print(' - Hardness (eV)')
print(' - Global Electrophilicity Index: GEI (eV)')
print(' - Adiabatic ionization Potential (eV)')
print(' - Adiabatic electron Affinity (eV)')

ad_ieea_dict = self.dd['ad_ieea'].file_data
dict_df = {k: [] for k in ['species', 'AD_ox_energy','AD_red_energy']}
dict_df = {k: [] for k in ['species', 'IE_adiabatic','EA_adiabatic']}
for file_name in ad_ieea_dict.keys():
final_dict = ad_ieea_dict[file_name]
neut_row = mol_df.loc[mol_df['species'] == file_name]
neut_e = list(neut_row['energy'])[0]
oxidized_e = final_dict['ox']['E']
reduced_e = final_dict['red']['E']
oe = oxidized_e - neut_e
re = reduced_e - neut_e

oe = (oxidized_e - neut_e)
re = (reduced_e - neut_e)

dict_df['species'].append(file_name)
dict_df['AD_ox_energy'].append(oe)
dict_df['AD_red_energy'].append(re)
dict_df['IE_adiabatic'].append(oe)
dict_df['EA_adiabatic'].append(re)
dict_df = pd.DataFrame(dict_df)
mol_df = mol_df.merge(dict_df,how='left', on='species')

if 'spc' in calced_list:
spc_dict = self.dd['spc'].file_data

Expand Down
45 changes: 28 additions & 17 deletions moldscript/ie_ea.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

import sys, os
import time
import datetime
import cclib as cc
from collections import defaultdict
from moldscript.argument_parser import load_variables
from moldscript.utils import eV_to_hartree
from moldscript.utils import eV_to_hartree, add_cpu_times

class ie_ea:
"""
Expand All @@ -33,46 +34,56 @@ def __init__(self, data, create_dat=True, **kwargs):

if create_dat:
elapsed_time = round(time.time() - start_time_overall, 2)
try:
total_cpu = add_cpu_times(self.file_data)
self.args.log.write(f"\n IE & EA calculations complete in {total_cpu} seconds")
except: pass

self.args.log.write(
f" --- IE & EA Parameter Collection complete in {elapsed_time} seconds\n"
f"-- IE & EA Parameter Collection complete in {elapsed_time} seconds\n"
)
self.args.log.finalize()

def get_data(self):
mydict = lambda: defaultdict(mydict)
file_data = mydict()
first = False
self.args.log.write(
f" --- IE & EA Parameter Collection starting"
)

for file_name in self.data.keys():
for i, file_name in enumerate(self.data.keys()):
ie_data, ea_data = None, None

if "ie" in self.data[file_name].keys():
ie_data = self.parse_cc_data(file_name, self.data[file_name]["ie"])
if first == False and self.args.program=='gaussian':
self.args.log.write(f" Functional used: {ie_data.metadata['functional']}")
self.args.log.write(f" Basis set used: {ie_data.metadata['basis_set']}")
first = True

if "ea" in self.data[file_name].keys():
ea_data = self.parse_cc_data(file_name, self.data[file_name]["ea"])
if first == False and self.args.program=='gaussian':
self.args.log.write(f" Functional used: {ea_data.metadata['functional']}")
self.args.log.write(f" Basis set used: {ea_data.metadata['basis_set']}")
first = True

if i == 0:
rel_dir = self.data[file_name]["ie"].split(os.getcwd()+'/')[1].split(file_name)[0]
self.args.log.write(
f"-- IE & EA Parameter Collection from {rel_dir}"
)
self.args.log.write(f" Package used: {ea_data.metadata['package']} {ea_data.metadata['package_version']}")
self.args.log.write(f" Functional used: {ea_data.metadata['functional']}")
self.args.log.write(f" Basis set used: {ea_data.metadata['basis_set']}\n")

if ie_data != None and ea_data != None:
self.args.log.write(
f"o Parsing IE & EA data from {file_name}"
)
file_data[file_name]["ox"]["E"] = ie_data.scfenergies[-1]*eV_to_hartree
file_data[file_name]["red"]["E"] = ea_data.scfenergies[-1]*eV_to_hartree
file_data[file_name]["ox"]["E"] = ie_data.scfenergies[-1] *eV_to_hartree
file_data[file_name]["red"]["E"] = ea_data.scfenergies[-1] *eV_to_hartree

else:
self.args.log.write(
f"x Skipping file {file_name} as either IE or EA doest not exist!"
)

file_data[file_name]['cpu_time'] = datetime.timedelta(0) # initialize cpu time
for time in ie_data.metadata['cpu_time']:
file_data[file_name]['cpu_time'] += time # add cpu time from IE
for time in ea_data.metadata['cpu_time']:
file_data[file_name]['cpu_time'] += time # add cpu time from EA

return file_data

def parse_cc_data(self, file_name, file):
Expand Down
3 changes: 1 addition & 2 deletions moldscript/moldscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def main():
if args.path_nbo is not None and not args.nbo: args.nbo = True
if args.path_fukui is not None and not args.fukui: args.fukui = True
if args.path_sp_ie_ea is not None and not args.sp_ie_ea: args.sp_ie_ea = True
if args.path_ad_ie_ea is not None and not args.ad_ie_ea: args.ad_ie_ea = True

if args.link:
# ALL DATA
Expand Down Expand Up @@ -122,8 +123,6 @@ def main():

# SP IE & EA
if args.sp_ie_ea:
print(args.path_sp_ie_ea, args.suffix_sp_ie,args.suffix_sp_ea)

sp_ie_ea_read = files(calc="sp_ie_ea", path=args.path_sp_ie_ea,
suffix_sp_ie=args.suffix_sp_ie,
suffix_sp_ea=args.suffix_sp_ea,
Expand Down
10 changes: 6 additions & 4 deletions moldscript/nbo.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ def get_data(self):
mydict = lambda: defaultdict(mydict)
file_data = mydict()

self.args.log.write(
f"-- NBO Parameter Collection starting"
)

for i, file_name in enumerate(self.data.keys()):

try:
Expand All @@ -57,6 +53,12 @@ def get_data(self):
nbo_data = None

if i == 0:
rel_dir = self.data[file_name].split(os.getcwd()+'/')[1].split(file_name)[0]

self.args.log.write(
f"-- NBO Parameter Collection from {rel_dir}"
)

self.args.log.write(f" Package used: {nbo_data.metadata['package']} {nbo_data.metadata['package_version']}")
try:
nbo_version = self.parse_nbo_version(self.data[file_name])
Expand Down
Loading