from openff.toolkit.typing.engines.smirnoff import ForceField
from openff.toolkit.topology import Molecule, Topology
from biopandas.pdb import PandasPdb
import matplotlib.pyplot as plt
from operator import itemgetter
from mendeleev import element
from simtk.openmm import app
from scipy import optimize
import subprocess as sp
from sys import stdout
import pandas as pd
import numpy as np
import statistics
import itertools
import parmed
import pickle
import shutil
import simtk
import scipy
import time
import math
import sys
import ast
import re
import os
[docs]BOHRS_PER_ANGSTROM = 0.529
[docs]HARTREE_PER_KCAL_MOL = 627.509391
#kcal/mol * A^2 to kJ/mol * nm^2
[docs]KCAL_MOL_PER_KJ_MOL = 4.184
[docs]RADIANS_PER_DEGREE = np.pi / 180.0
[docs]method_basis_scale_dict = {
"HF STO-3G": 0.817,
"HF 3-21G": 0.906,
"HF 3-21G*": 0.903,
"HF 6-31G": 0.903,
"HF 6-31G*": 0.899,
"HF 6-31G**": 0.903,
"HF 6-31+G**": 0.904,
"HF 6-311G*": 0.904,
"HF 6-311G**": 0.909,
"HF TZVP": 0.909,
"HF cc-pVDZ": 0.908,
"HF cc-pVTZ": 0.91,
"HF cc-pVQZ": 0.908,
"HF aug-cc-pVDZ": 0.911,
"HF aug-cc-pVTZ": 0.91,
"HF aug-cc-pVQZ": 0.909,
"HF daug-cc-pVDZ": 0.912,
"HF daug-cc-pVTZ": 0.905,
"ROHF 3-21G": 0.907,
"ROHF 3-21G*": 0.909,
"ROHF 6-31G": 0.895,
"ROHF 6-31G*": 0.89,
"ROHF 6-31G**": 0.855,
"ROHF 6-31+G**": 0.856,
"ROHF 6-311G*": 0.856,
"ROHF 6-311G**": 0.913,
"ROHF cc-pVDZ": 0.861,
"ROHF cc-pVTZ": 0.901,
"LSDA STO-3G": 0.896,
"LSDA 3-21G": 0.984,
"LSDA 3-21G*": 0.982,
"LSDA 6-31G": 0.98,
"LSDA 6-31G*": 0.981,
"LSDA 6-31G**": 0.981,
"LSDA 6-31+G**": 0.985,
"LSDA 6-311G*": 0.984,
"LSDA 6-311G**": 0.988,
"LSDA TZVP": 0.988,
"LSDA cc-pVDZ": 0.989,
"LSDA cc-pVTZ": 0.989,
"LSDA aug-cc-pVDZ": 0.989,
"LSDA aug-cc-pVTZ": 0.991,
"BLYP STO-3G": 0.925,
"BLYP 3-21G": 0.995,
"BLYP 3-21G*": 0.994,
"BLYP 6-31G": 0.992,
"BLYP 6-31G*": 0.992,
"BLYP 6-31G**": 0.992,
"BLYP 6-31+G**": 0.995,
"BLYP 6-311G*": 0.998,
"BLYP 6-311G**": 0.996,
"BLYP TZVP": 0.998,
"BLYP cc-pVDZ": 1.002,
"BLYP cc-pVTZ": 0.997,
"BLYP aug-cc-pVDZ": 0.998,
"BLYP aug-cc-pVTZ": 0.997,
"B1B95 STO-3G": 0.883,
"B1B95 3-21G": 0.957,
"B1B95 3-21G*": 0.955,
"B1B95 6-31G": 0.954,
"B1B95 6-31G*": 0.949,
"B1B95 6-31G**": 0.955,
"B1B95 6-31+G**": 0.957,
"B1B95 6-311G*": 0.959,
"B1B95 6-311G**": 0.96,
"B1B95 TZVP": 0.957,
"B1B95 cc-pVDZ": 0.961,
"B1B95 cc-pVTZ": 0.957,
"B1B95 aug-cc-pVDZ": 0.958,
"B1B95 aug-cc-pVTZ": 0.959,
"B3LYP STO-3G": 0.892,
"B3LYP 3-21G": 0.965,
"B3LYP 3-21G*": 0.962,
"B3LYP 6-31G": 0.962,
"B3LYP 6-31G*": 0.96,
"B3LYP 6-31G**": 0.961,
"B3LYP 6-31+G**": 0.964,
"B3LYP 6-311G*": 0.966,
"B3LYP 6-311G**": 0.967,
"B3LYP TZVP": 0.965,
"B3LYP cc-pVDZ": 0.97,
"B3LYP cc-pVTZ": 0.967,
"B3LYP cc-pVQZ": 0.969,
"B3LYP aug-cc-pVDZ": 0.97,
"B3LYP aug-cc-pVTZ": 0.968,
"B3LYP aug-cc-pVQZ": 0.969,
"B3PW91 STO-3G": 0.885,
"B3PW91 3-21G": 0.961,
"B3PW91 3-21G*": 0.959,
"B3PW91 6-31G": 0.958,
"B3PW91 6-31G*": 0.957,
"B3PW91 6-31G**": 0.958,
"B3PW91 6-31+G**": 0.96,
"B3PW91 6-311G*": 0.963,
"B3PW91 6-311G**": 0.963,
"B3PW91 TZVP": 0.964,
"B3PW91 cc-pVDZ": 0.965,
"B3PW91 cc-pVTZ": 0.962,
"B3PW91 aug-cc-pVDZ": 0.965,
"B3PW91 aug-cc-pVTZ": 0.965,
"mPW1PW91 STO-3G": 0.879,
"mPW1PW91 3-21G": 0.955,
"mPW1PW91 3-21G*": 0.95,
"mPW1PW91 6-31G": 0.947,
"mPW1PW91 6-31G*": 0.948,
"mPW1PW91 6-31G**": 0.952,
"mPW1PW91 6-31+G**": 0.952,
"mPW1PW91 6-311G*": 0.954,
"mPW1PW91 6-311G**": 0.957,
"mPW1PW91 TZVP": 0.954,
"mPW1PW91 cc-pVDZ": 0.958,
"mPW1PW91 cc-pVTZ": 0.959,
"mPW1PW91 aug-cc-pVDZ": 0.958,
"mPW1PW91 aug-cc-pVTZ": 0.958,
"PBEPBE STO-3G": 0.914,
"PBEPBE 3-21G": 0.991,
"PBEPBE 3-21G*": 0.954,
"PBEPBE 6-31G": 0.986,
"PBEPBE 6-31G*": 0.986,
"PBEPBE 6-31G**": 0.986,
"PBEPBE 6-31+G**": 0.989,
"PBEPBE 6-311G*": 0.99,
"PBEPBE 6-311G**": 0.991,
"PBEPBE TZVP": 0.989,
"PBEPBE cc-pVDZ": 0.994,
"PBEPBE cc-pVTZ": 0.993,
"PBEPBE aug-cc-pVDZ": 0.994,
"PBEPBE aug-cc-pVTZ": 0.994,
"PBE1PBE STO-3G": 0.882,
"PBE1PBE 3-21G": 0.96,
"PBE1PBE 3-21G*": 0.96,
"PBE1PBE 6-31G": 0.956,
"PBE1PBE 6-31G*": 0.95,
"PBE1PBE 6-31G**": 0.953,
"PBE1PBE 6-31+G**": 0.955,
"PBE1PBE 6-311G*": 0.959,
"PBE1PBE 6-311G**": 0.959,
"PBE1PBE TZVP": 0.96,
"PBE1PBE cc-pVDZ": 0.962,
"PBE1PBE cc-pVTZ": 0.961,
"PBE1PBE aug-cc-pVDZ": 0.962,
"PBE1PBE aug-cc-pVTZ": 0.962,
"HSEh1PBE STO-3G": 0.883,
"HSEh1PBE 3-21G": 0.963,
"HSEh1PBE 3-21G*": 0.96,
"HSEh1PBE 6-31G": 0.957,
"HSEh1PBE 6-31G*": 0.951,
"HSEh1PBE 6-31G**": 0.954,
"HSEh1PBE 6-31+G**": 0.955,
"HSEh1PBE 6-311G*": 0.96,
"HSEh1PBE 6-311G**": 0.96,
"HSEh1PBE TZVP": 0.96,
"HSEh1PBE cc-pVDZ": 0.962,
"HSEh1PBE cc-pVTZ": 0.961,
"HSEh1PBE aug-cc-pVDZ": 0.962,
"HSEh1PBE aug-cc-pVTZ": 0.962,
"TPSSh 3-21G": 0.969,
"TPSSh 3-21G*": 0.966,
"TPSSh 6-31G": 0.962,
"TPSSh 6-31G*": 0.959,
"TPSSh 6-31G**": 0.959,
"TPSSh 6-31+G**": 0.963,
"TPSSh 6-311G*": 0.963,
"TPSSh TZVP": 0.964,
"TPSSh cc-pVDZ": 0.972,
"TPSSh cc-pVTZ": 0.968,
"TPSSh aug-cc-pVDZ": 0.967,
"TPSSh aug-cc-pVTZ": 0.965,
"B97D3 3-21G": 0.983,
"B97D3 6-31G*": 0.98,
"B97D3 6-31+G**": 0.983,
"B97D3 6-311G**": 0.986,
"B97D3 TZVP": 0.986,
"B97D3 cc-pVDZ": 0.992,
"B97D3 cc-pVTZ": 0.986,
"B97D3 aug-cc-pVTZ": 0.985,
"MP2 STO-3G": 0.872,
"MP2 3-21G": 0.955,
"MP2 3-21G*": 0.951,
"MP2 6-31G": 0.957,
"MP2 6-31G*": 0.943,
"MP2 6-31G**": 0.937,
"MP2 6-31+G**": 0.941,
"MP2 6-311G*": 0.95,
"MP2 6-311G**": 0.95,
"MP2 TZVP": 0.948,
"MP2 cc-pVDZ": 0.953,
"MP2 cc-pVTZ": 0.95,
"MP2 cc-pVQZ": 0.948,
"MP2 aug-cc-pVDZ": 0.959,
"MP2 aug-cc-pVTZ": 0.953,
"MP2 aug-cc-pVQZ": 0.95,
"MP2=FULL STO-3G": 0.889,
"MP2=FULL 3-21G": 0.955,
"MP2=FULL 3-21G*": 0.948,
"MP2=FULL 6-31G": 0.95,
"MP2=FULL 6-31G*": 0.942,
"MP2=FULL 6-31G**": 0.934,
"MP2=FULL 6-31+G**": 0.939,
"MP2=FULL 6-311G*": 0.947,
"MP2=FULL 6-311G**": 0.949,
"MP2=FULL TZVP": 0.953,
"MP2=FULL cc-pVDZ": 0.95,
"MP2=FULL cc-pVTZ": 0.949,
"MP2=FULL cc-pVQZ": 0.957,
"MP2=FULL aug-cc-pVDZ": 0.969,
"MP2=FULL aug-cc-pVTZ": 0.951,
"MP2=FULL aug-cc-pVQZ": 0.956,
"MP3 STO-3G": 0.894,
"MP3 3-21G": 0.968,
"MP3 3-21G*": 0.965,
"MP3 6-31G": 0.966,
"MP3 6-31G*": 0.939,
"MP3 6-31G**": 0.935,
"MP3 6-31+G**": 0.931,
"MP3 TZVP": 0.935,
"MP3 cc-pVDZ": 0.948,
"MP3 cc-pVTZ": 0.945,
"MP3=FULL 6-31G*": 0.938,
"MP3=FULL 6-31+G**": 0.932,
"MP3=FULL TZVP": 0.934,
"MP3=FULL cc-pVDZ": 0.94,
"MP3=FULL cc-pVTZ": 0.933,
"B2PLYP 6-31G*": 0.949,
"B2PLYP 6-31+G**": 0.952,
"B2PLYP TZVP": 0.954,
"B2PLYP cc-pVDZ": 0.958,
"B2PLYP cc-pVTZ": 0.959,
"B2PLYP cc-pVQZ": 0.957,
"B2PLYP aug-cc-pVTZ": 0.961,
"B2PLYP=FULL 3-21G": 0.952,
"B2PLYP=FULL 6-31G*": 0.948,
"B2PLYP=FULL 6-31+G**": 0.951,
"B2PLYP=FULL TZVP": 0.954,
"B2PLYP=FULL cc-pVDZ": 0.959,
"B2PLYP=FULL cc-pVTZ": 0.956,
"B2PLYP=FULL aug-cc-pVDZ": 0.962,
"B2PLYP=FULL aug-cc-pVTZ": 0.959,
"CID 3-21G": 0.932,
"CID 3-21G*": 0.931,
"CID 6-31G": 0.935,
"CID 6-31G*": 0.924,
"CID 6-31G**": 0.924,
"CID 6-31+G**": 0.924,
"CID 6-311G*": 0.929,
"CID cc-pVDZ": 0.924,
"CID cc-pVTZ": 0.927,
"CISD 3-21G": 0.941,
"CISD 3-21G*": 0.934,
"CISD 6-31G": 0.938,
"CISD 6-31G*": 0.926,
"CISD 6-31G**": 0.918,
"CISD 6-31+G**": 0.922,
"CISD 6-311G*": 0.925,
"CISD cc-pVDZ": 0.922,
"CISD cc-pVTZ": 0.93,
"QCISD 3-21G": 0.969,
"QCISD 3-21G*": 0.961,
"QCISD 6-31G": 0.964,
"QCISD 6-31G*": 0.952,
"QCISD 6-31G**": 0.941,
"QCISD 6-31+G**": 0.945,
"QCISD 6-311G*": 0.957,
"QCISD 6-311G**": 0.954,
"QCISD TZVP": 0.955,
"QCISD cc-pVDZ": 0.959,
"QCISD cc-pVTZ": 0.956,
"QCISD aug-cc-pVDZ": 0.969,
"QCISD aug-cc-pVTZ": 0.962,
"CCD 3-21G": 0.972,
"CCD 3-21G*": 0.957,
"CCD 6-31G": 0.96,
"CCD 6-31G*": 0.947,
"CCD 6-31G**": 0.938,
"CCD 6-31+G**": 0.942,
"CCD 6-311G*": 0.955,
"CCD 6-311G**": 0.955,
"CCD TZVP": 0.948,
"CCD cc-pVDZ": 0.957,
"CCD cc-pVTZ": 0.934,
"CCD aug-cc-pVDZ": 0.965,
"CCD aug-cc-pVTZ": 0.957,
"CCSD 3-21G": 0.943,
"CCSD 3-21G*": 0.943,
"CCSD 6-31G": 0.943,
"CCSD 6-31G*": 0.944,
"CCSD 6-31G**": 0.933,
"CCSD 6-31+G**": 0.934,
"CCSD 6-311G*": 0.954,
"CCSD TZVP": 0.954,
"CCSD cc-pVDZ": 0.947,
"CCSD cc-pVTZ": 0.941,
"CCSD cc-pVQZ": 0.951,
"CCSD aug-cc-pVDZ": 0.963,
"CCSD aug-cc-pVTZ": 0.956,
"CCSD aug-cc-pVQZ": 0.953,
"CCSD=FULL 6-31G*": 0.95,
"CCSD=FULL TZVP": 0.948,
"CCSD=FULL cc-pVTZ": 0.948,
"CCSD=FULL aug-cc-pVTZ": 0.951,
}
[docs]element_list = [
["1 ", "H ", "Hydrogen"],
["2 ", "He", "Helium"],
["3 ", "Li", "Lithium"],
["4 ", "Be", "Beryllium"],
["5 ", "B ", "Boron"],
["6 ", "C ", "Carbon"],
["7 ", "N ", "Nitrogen"],
["8 ", "O ", "Oxygen"],
["9 ", "F ", "Fluorine"],
["10", "Ne", "Neon"],
["11", "Na", "Sodium"],
["12", "Mg", "Magnesium"],
["13", "Al", "Aluminum"],
["14", "Si", "Silicon"],
["15", "P ", "Phosphorus"],
["16", "S ", "Sulfur"],
["17", "Cl", "Chlorine"],
["18", "Ar", "Argon"],
["19", "K ", "Potassium"],
["20", "Ca", "Calcium"],
["21", "Sc", "Scandium"],
["22", "Ti", "Titanium"],
["23", "V ", "Vanadium"],
["24", "Cr", "Chromium"],
["25", "Mn", "Manganese"],
["26", "Fe", "Iron"],
["27", "Co", "Cobalt"],
["28", "Ni", "Nickel"],
["29", "Cu", "Copper"],
["30", "Zn", "Zinc"],
["31", "Ga", "Gallium"],
["32", "Ge", "Germanium"],
["33", "As", "Arsenic"],
["34", "Se", "Selenium"],
["35", "Br", "Bromine"],
["36", "Kr", "Krypton"],
["37", "Rb", "Rubidium"],
["38", "Sr", "Strontium"],
["39", "Y ", "Yttrium"],
["40", "Zr", "Zirconium"],
["41", "Nb", "Niobium"],
["42", "Mo", "Molybdenum"],
["43", "Tc", "Technetium"],
["44", "Ru", "Ruthenium"],
["45", "Rh", "Rhodium"],
["46", "Pd", "Palladium"],
["47", "Ag", "Silver"],
["48", "Cd", "Cadmium"],
["49", "In", "Indium"],
["50", "Sn", "Tin"],
["51", "Sb", "Antimony"],
["52", "Te", "Tellurium"],
["53", "I ", "Iodine"],
["54", "Xe", "Xenon"],
["55", "Cs", "Cesium"],
["56", "Ba", "Barium"],
["57", "La", "Lanthanum"],
["58", "Ce", "Cerium"],
["59", "Pr", "Praseodymium"],
["60", "Nd", "Neodymium"],
["61", "Pm", "Promethium"],
["62", "Sm", "Samarium"],
["63", "Eu", "Europium"],
["64", "Gd", "Gadolinium"],
["65", "Tb", "Terbium"],
["66", "Dy", "Dysprosium"],
["67", "Ho", "Holmium"],
["68", "Er", "Erbium"],
["69", "Tm", "Thulium"],
["70", "Yb", "Ytterbium"],
["71", "Lu", "Lutetium"],
["72", "Hf", "Hafnium"],
["73", "Ta", "Tantalum"],
["74", "W ", "Tungsten"],
["75", "Re", "Rhenium"],
["76", "Os", "Osmium"],
["77", "Ir", "Iridium"],
["78", "Pt", "Platinum"],
["79", "Au", "Gold"],
["80", "Hg", "Mercury"],
["81", "Tl", "Thallium"],
["82", "Pb", "Lead"],
["83", "Bi", "Bismuth"],
["84", "Po", "Polonium"],
["85", "At", "Astatine"],
["86", "Rn", "Radon"],
["87", "Fr", "Francium"],
["88", "Ra", "Radium"],
["89", "Ac", "Actinium"],
["90", "Th", "Thorium"],
["91", "Pa", "Protactinium"],
["92", "U ", "Uranium"],
["93", "Np", "Neptunium"],
["94", "Pu", "Plutonium"],
["95", "Am", "Americium"],
["96", "Cm", "Curium"],
["97", "Bk", "Berkelium"],
["98", "Cf", "Californium"],
["99", "Es", "Einsteinium"],
]
[docs]def get_vibrational_scaling(functional, basis_set):
"""
Returns vibrational scaling factor given the functional
and the basis set for the QM engine.
Parameters
----------
functional: str
Functional
basis_set: str
Basis set
Returns
-------
vib_scale: float
Vibrational scaling factor corresponding to the given
the basis_set and the functional.
Examples
--------
>>> get_vibrational_scaling("QCISD", "6-311G*")
0.957
"""
vib_scale = method_basis_scale_dict.get(functional + " " + basis_set)
return vib_scale
[docs]def unit_vector_N(u_BC, u_AB):
"""
Calculates unit normal vector perpendicular to plane ABC.
Parameters
----------
u_BC : (.. , 1, 3) array
Unit vector from atom B to atom C.
u_AB : (..., 1, 3) array
Unit vector from atom A to atom B.
Returns
-------
u_N : (..., 1, 3) array
Unit normal vector perpendicular to plane ABC.
Examples
--------
>>> u_BC = [0.34040355, 0.62192853, 0.27011169]
>>> u_AB = [0.28276792, 0.34232697, 0.02370306]
>>> unit_vector_N(u_BC, u_AB)
array([-0.65161629, 0.5726879 , -0.49741811])
"""
cross_product = np.cross(u_BC, u_AB)
norm_u_N = np.linalg.norm(cross_product)
u_N = cross_product / norm_u_N
return u_N
[docs]def delete_guest_angle_params(guest_qm_params_file="guest_qm_params.txt"):
"""
"""
f_params = open(guest_qm_params_file, "r")
lines_params = f_params.readlines()
for i in range(len(lines_params)):
if "Begin writing the Angle Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Angle Parameters" in lines_params[i]:
to_end = int(i)
lines_selected = lines_params[:to_begin] + lines_params[to_end + 1 :]
with open(guest_qm_params_file, "w") as f_:
f_.write("".join(lines_selected))
return
[docs]def remove_bad_angle_params(
guest_qm_params_file="guest_qm_params.txt", angle=1.00, k_angle=500):
with open(guest_qm_params_file, "r") as f_params:
lines_params = f_params.readlines()
for i in range(len(lines_params)):
if "Begin writing the Angle Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Angle Parameters" in lines_params[i]:
to_end = int(i)
angle_params = lines_params[to_begin + 1 : to_end]
lines_to_omit = []
for i in angle_params:
if float(re.findall(r"[-+]?\d+[.]?\d*", i)[0]) < float(angle) or float(
re.findall(r"[-+]?\d+[.]?\d*", i)[1]
) > float(k_angle):
lines_to_omit.append(i)
for b in lines_to_omit:
lines_params.remove(b)
with open(guest_qm_params_file, "w") as file:
for j in lines_params:
file.write(j)
[docs]def get_num_host_atoms(host_pdb):
"""
Reads the host PDB file and returns the
total number of atoms.
"""
ppdb = PandasPdb()
ppdb.read_pdb(host_pdb)
no_host_atoms = ppdb.df["ATOM"].shape[0]
return no_host_atoms
[docs]def change_names(inpcrd_file, prmtop_file, pdb_file):
command = "cp -r " + inpcrd_file + " system_qmmmrebind.inpcrd"
os.system(command)
command = "cp -r " + prmtop_file + " system_qmmmrebind.prmtop"
os.system(command)
command = "cp -r " + pdb_file + " system_qmmmrebind.pdb"
os.system(command)
[docs]def copy_file(source, destination):
"""
Copies a file from a source to the destination.
"""
shutil.copy(source, destination)
[docs]def get_openmm_energies(system_pdb, system_xml):
"""
Returns decomposed OPENMM energies for the
system.
Parameters
----------
system_pdb : str
Input PDB file
system_xml : str
Forcefield file in XML format
"""
pdb = simtk.openmm.app.PDBFile(system_pdb)
ff_xml_file = open(system_xml, "r")
system = simtk.openmm.XmlSerializer.deserialize(ff_xml_file.read())
integrator = simtk.openmm.LangevinIntegrator(
300 * simtk.unit.kelvin,
1 / simtk.unit.picosecond,
0.002 * simtk.unit.picoseconds,
)
simulation = simtk.openmm.app.Simulation(pdb.topology, system, integrator)
simulation.context.setPositions(pdb.positions)
state = simulation.context.getState(
getEnergy=True, getParameters=True, getForces=True
)
force_group = []
for i, force in enumerate(system.getForces()):
force_group.append(force.__class__.__name__)
forcegroups = {}
for i in range(system.getNumForces()):
force = system.getForce(i)
force.setForceGroup(i)
forcegroups[force] = i
energies = {}
for f, i in forcegroups.items():
energies[f] = (
simulation.context.getState(getEnergy=True, groups=2 ** i)
.getPotentialEnergy()
._value
)
decomposed_energy = []
for key, val in energies.items():
decomposed_energy.append(val)
df_energy_openmm = pd.DataFrame(
list(zip(force_group, decomposed_energy)),
columns=["Energy_term", "Energy_openmm_params"],
)
energy_values = [
list(
df_energy_openmm.loc[
df_energy_openmm["Energy_term"] == "HarmonicBondForce"
].values[0]
)[1],
list(
df_energy_openmm.loc[
df_energy_openmm["Energy_term"] == "HarmonicAngleForce"
].values[0]
)[1],
list(
df_energy_openmm.loc[
df_energy_openmm["Energy_term"] == "PeriodicTorsionForce"
].values[0]
)[1],
list(
df_energy_openmm.loc[
df_energy_openmm["Energy_term"] == "NonbondedForce"
].values[0]
)[1],
]
energy_group = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_open_mm = pd.DataFrame(
list(zip(energy_group, energy_values)),
columns=["Energy_term", "Energy_openmm_params"],
)
df_energy_open_mm = df_energy_open_mm.set_index("Energy_term")
print(df_energy_open_mm)
def u_PA_from_angles(atom_A, atom_B, atom_C, coords):
"""
Returns the vector in the plane A,B,C and perpendicular to AB.
Parameters
----------
atom_A : int
Index of atom A (left, starting from 0).
atom_B : int
Index of atom B (center, starting from 0).
atom_C : int
Index of atom C (right, starting from 0).
coords : (..., N, 3) array
An array which contains the coordinates of all
the N atoms.
"""
diff_AB = coords[atom_B, :] - coords[atom_A, :]
norm_diff_AB = np.linalg.norm(diff_AB)
u_AB = diff_AB / norm_diff_AB
diff_CB = coords[atom_B, :] - coords[atom_C, :]
norm_diff_CB = np.linalg.norm(diff_CB)
u_CB = diff_CB / norm_diff_CB
u_N = unit_vector_N(u_CB, u_AB)
u_PA = np.cross(u_N, u_AB)
norm_PA = np.linalg.norm(u_PA)
u_PA = u_PA / norm_PA
return u_PA
[docs]def force_angle_constant(
atom_A,
atom_B,
atom_C,
bond_lengths,
eigenvalues,
eigenvectors,
coords,
scaling_1,
scaling_2,
):
"""
Calculates force constant according to Equation 14 of
Seminario calculation paper; returns angle (in kcal/mol/rad^2)
and equilibrium angle (in degrees).
Parameters
----------
atom_A : int
Index of atom A (left, starting from 0).
atom_B : int
Index of atom B (center, starting from 0).
atom_C : int
Index of atom C (right, starting from 0).
bond_lengths : (N, N) array
An N * N array containing the bond lengths for
all the possible pairs of atoms.
eigenvalues : (N, N, 3) array
A numpy array of shape (N, N, 3) containing
eigenvalues of the hessian matrix, where N
is the total number of atoms.
eigenvectors : (3, 3, N, N) array
A numpy array of shape (3, 3, N, N) containing
eigenvectors of the hessian matrix.
coords : (N, 3) array
A numpy array of shape (N, 3) having the X, Y and Z
coordinates of all N atoms.
scaling_1 : float
Factor to scale the projections of eigenvalues for AB.
scaling_2 : float
Factor to scale the projections of eigenvalues for BC.
Returns
-------
k_theta : float
Force angle constant calculated using modified
seminario method.
k_0 : float
Equilibrium angle between AB and BC.
"""
# Vectors along bonds calculated
diff_AB = coords[atom_B, :] - coords[atom_A, :]
norm_diff_AB = np.linalg.norm(diff_AB)
u_AB = diff_AB / norm_diff_AB
diff_CB = coords[atom_B, :] - coords[atom_C, :]
norm_diff_CB = np.linalg.norm(diff_CB)
u_CB = diff_CB / norm_diff_CB
# Bond lengths and eigenvalues found
bond_length_AB = bond_lengths[atom_A, atom_B]
eigenvalues_AB = eigenvalues[atom_A, atom_B, :]
eigenvectors_AB = eigenvectors[0:3, 0:3, atom_A, atom_B]
bond_length_BC = bond_lengths[atom_B, atom_C]
eigenvalues_CB = eigenvalues[atom_C, atom_B, :]
eigenvectors_CB = eigenvectors[0:3, 0:3, atom_C, atom_B]
# Normal vector to angle plane found
u_N = unit_vector_N(u_CB, u_AB)
u_PA = np.cross(u_N, u_AB)
norm_u_PA = np.linalg.norm(u_PA)
u_PA = u_PA / norm_u_PA
u_PC = np.cross(u_CB, u_N)
norm_u_PC = np.linalg.norm(u_PC)
u_PC = u_PC / norm_u_PC
sum_first = 0
sum_second = 0
# Projections of eigenvalues
for i in range(0, 3):
eig_AB_i = eigenvectors_AB[:, i]
eig_BC_i = eigenvectors_CB[:, i]
sum_first = sum_first + (
eigenvalues_AB[i] * abs(dot_product(u_PA, eig_AB_i))
)
sum_second = sum_second + (
eigenvalues_CB[i] * abs(dot_product(u_PC, eig_BC_i))
)
# Scaling due to additional angles - Modified Seminario Part
sum_first = sum_first / scaling_1
sum_second = sum_second / scaling_2
# Added as two springs in series
k_theta = (1 / ((bond_length_AB ** 2) * sum_first)) + (
1 / ((bond_length_BC ** 2) * sum_second)
)
k_theta = 1 / k_theta
k_theta = -k_theta # Change to OPLS form
k_theta = abs(k_theta * 0.5) # Change to OPLS form
# Equilibrium Angle
theta_0 = math.degrees(math.acos(np.dot(u_AB, u_CB)))
# If the vectors u_CB and u_AB are linearly dependent u_N cannot be defined.
# This case is dealt with here :
if abs(sum((u_CB) - (u_AB))) < 0.01 or (
abs(sum((u_CB) - (u_AB))) > 1.99 and abs(sum((u_CB) - (u_AB))) < 2.01
):
scaling_1 = 1
scaling_2 = 1
[k_theta, theta_0] = force_angle_constant_special_case(
atom_A,
atom_B,
atom_C,
bond_lengths,
eigenvalues,
eigenvectors,
coords,
scaling_1,
scaling_2,
)
return k_theta, theta_0
[docs]def dot_product(u_PA, eig_AB):
"""
Returns the dot product of two vectors.
Parameters
----------
u_PA : (..., 1, 3) array
Unit vector perpendicular to AB and in the
plane of A, B, C.
eig_AB : (..., 3, 3) array
Eigenvectors of the hessian matrix for
the bond AB.
"""
x = 0
for i in range(0, 3):
x = x + u_PA[i] * eig_AB[i].conjugate()
return x
[docs]def force_angle_constant_special_case(
atom_A,
atom_B,
atom_C,
bond_lengths,
eigenvalues,
eigenvectors,
coords,
scaling_1,
scaling_2,
):
"""
Calculates force constant according to Equation 14
of Seminario calculation paper when the vectors
u_CB and u_AB are linearly dependent and u_N cannot
be defined. It instead takes samples of u_N across a
unit sphere for the calculation; returns angle
(in kcal/mol/rad^2) and equilibrium angle in degrees.
Parameters
----------
atom_A : int
Index of atom A (left, starting from 0).
atom_B : int
Index of atom B (center, starting from 0).
atom_C : int
Index of atom C (right, starting from 0).
bond_lengths : (N, N) array
An N * N array containing the bond lengths for
all the possible pairs of atoms.
eigenvalues : (N, N, 3) array
A numpy array of shape (N, N, 3) containing
eigenvalues of the hessian matrix, where N
is the total number of atoms.
eigenvectors : (3, 3, N, N) array
A numpy array of shape (3, 3, N, N) containing
eigenvectors of the hessian matrix.
coords : (N, 3) array
A numpy array of shape (N, 3) having the X, Y,
and Z coordinates of all N atoms.
scaling_1 : float
Factor to scale the projections of eigenvalues for AB.
scaling_2 : float
Factor to scale the projections of eigenvalues for BC.
Returns
-------
k_theta : float
Force angle constant calculated using modified
seminario method.
k_0 : float
Equilibrium angle between AB and BC.
"""
# Vectors along bonds calculated
diff_AB = coords[atom_B, :] - coords[atom_A, :]
norm_diff_AB = np.linalg.norm(diff_AB)
u_AB = diff_AB / norm_diff_AB
diff_CB = coords[atom_B, :] - coords[atom_C, :]
norm_diff_CB = np.linalg.norm(diff_CB)
u_CB = diff_CB / norm_diff_CB
# Bond lengths and eigenvalues found
bond_length_AB = bond_lengths[atom_A, atom_B]
eigenvalues_AB = eigenvalues[atom_A, atom_B, :]
eigenvectors_AB = eigenvectors[0:3, 0:3, atom_A, atom_B]
bond_length_BC = bond_lengths[atom_B, atom_C]
eigenvalues_CB = eigenvalues[atom_C, atom_B, :]
eigenvectors_CB = eigenvectors[0:3, 0:3, atom_C, atom_B]
k_theta_array = np.zeros((180, 360))
# Find force constant with varying u_N (with vector uniformly
# sampled across a sphere)
for theta in range(0, 180):
for phi in range(0, 360):
r = 1
u_N = [
r
* math.sin(math.radians(theta))
* math.cos(math.radians(theta)),
r
* math.sin(math.radians(theta))
* math.sin(math.radians(theta)),
r * math.cos(math.radians(theta)),
]
u_PA = np.cross(u_N, u_AB)
u_PA = u_PA / np.linalg.norm(u_PA)
u_PC = np.cross(u_CB, u_N)
u_PC = u_PC / np.linalg.norm(u_PC)
sum_first = 0
sum_second = 0
# Projections of eigenvalues
for i in range(0, 3):
eig_AB_i = eigenvectors_AB[:, i]
eig_BC_i = eigenvectors_CB[:, i]
sum_first = sum_first + (
eigenvalues_AB[i] * abs(dot_product(u_PA, eig_AB_i))
)
sum_second = sum_second + (
eigenvalues_CB[i] * abs(dot_product(u_PC, eig_BC_i))
)
# Added as two springs in series
k_theta_ij = (1 / ((bond_length_AB ** 2) * sum_first)) + (
1 / ((bond_length_BC ** 2) * sum_second)
)
k_theta_ij = 1 / k_theta_ij
k_theta_ij = -k_theta_ij # Change to OPLS form
k_theta_ij = abs(k_theta_ij * 0.5) # Change to OPLS form
k_theta_array[theta, phi] = k_theta_ij
# Removes cases where u_N was linearly dependent of u_CB or u_AB.
# Force constant used is taken as the mean.
k_theta = np.mean(np.mean(k_theta_array))
# Equilibrium Angle independent of u_N
theta_0 = math.degrees(math.cos(np.dot(u_AB, u_CB)))
return k_theta, theta_0
[docs]def force_constant_bond(atom_A, atom_B, eigenvalues, eigenvectors, coords):
"""
Calculates the bond force constant for the bonds in the
molecule according to equation 10 of seminario paper,
given the bond atoms' indices and the corresponding
eigenvalues, eigenvectors and coordinates matrices.
Parameters
----------
atom_A : int
Index of Atom A.
atom_B : int
Index of Atom B.
eigenvalues : (N, N, 3) array
A numpy array of shape (N, N, 3) containing eigenvalues
of the hessian matrix, where N is the total number
of atoms.
eigenvectors : (3, 3, N, N) array
A numpy array of shape (3, 3, N, N) containing the
eigenvectors of the hessian matrix.
coords : (N, 3) array
A numpy array of shape (N, 3) having the X, Y, and
Z coordinates of all N atoms.
Returns
--------
k_AB : float
Bond Force Constant value for the bond with atoms A and B.
"""
# Eigenvalues and eigenvectors calculated
eigenvalues_AB = eigenvalues[atom_A, atom_B, :]
eigenvectors_AB = eigenvectors[:, :, atom_A, atom_B]
# Vector along bond
diff_AB = np.array(coords[atom_B, :]) - np.array(coords[atom_A, :])
norm_diff_AB = np.linalg.norm(diff_AB)
unit_vectors_AB = diff_AB / norm_diff_AB
k_AB = 0
# Projections of eigenvalues
for i in range(0, 3):
dot_product = abs(np.dot(unit_vectors_AB, eigenvectors_AB[:, i]))
k_AB = k_AB + (eigenvalues_AB[i] * dot_product)
k_AB = -k_AB * 0.5 # Convert to OPLS form
return k_AB
[docs]def u_PA_from_angles(atom_A, atom_B, atom_C, coords):
"""
Returns the vector in the plane A,B,C and perpendicular to AB.
Parameters
----------
atom_A : int
Index of atom A (left, starting from 0).
atom_B : int
Index of atom B (center, starting from 0).
atom_C : int
Index of atom C (right, starting from 0).
coords : (..., N, 3) array
An array containing the coordinates of all the N atoms.
Returns
-------
u_PA : (..., 1, 3) array
Unit vector perpendicular to AB and in the plane of A, B, C.
"""
diff_AB = coords[atom_B, :] - coords[atom_A, :]
norm_diff_AB = np.linalg.norm(diff_AB)
u_AB = diff_AB / norm_diff_AB
diff_CB = coords[atom_B, :] - coords[atom_C, :]
norm_diff_CB = np.linalg.norm(diff_CB)
u_CB = diff_CB / norm_diff_CB
u_N = unit_vector_N(u_CB, u_AB)
u_PA = np.cross(u_N, u_AB)
norm_PA = np.linalg.norm(u_PA)
u_PA = u_PA / norm_PA
return u_PA
[docs]def reverse_list(lst):
"""
Returns the reversed form of a given list.
Parameters
----------
lst : list
Input list.
Returns
-------
reversed_list : list
Reversed input list.
Examples
--------
>>> lst = [5, 4, 7, 2]
>>> reverse_list(lst)
[2, 7, 4, 5]
"""
reversed_list = lst[::-1]
return reversed_list
[docs]def uniq(input_):
"""
Returns a list with only unique elements from a list
containing duplicate / repeating elements.
Parameters
----------
input_ : list
Input list.
Returns
-------
output : list
List with only unique elements.
Examples
--------
>>> lst = [2, 4, 2, 9, 10, 35, 10]
>>> uniq(lst)
[2, 4, 9, 10, 35]
"""
output = []
for x in input_:
if x not in output:
output.append(x)
return output
[docs]def search_in_file(file: str, word: str) -> list:
"""
Search for the given string in file and return lines
containing that string along with line numbers.
Parameters
----------
file : str
Input file.
word : str
Search word.
Returns
-------
list_of_results : list
List of lists with each element representing the
line number and the line contents.
"""
line_number = 0
list_of_results = []
with open(file, "r") as f:
for line in f:
line_number += 1
if word in line:
list_of_results.append((line_number, line.rstrip()))
return list_of_results
[docs]def list_to_dict(lst):
"""
Converts an input list with mapped characters (every
odd entry is the key of the dictionary and every
even entry adjacent to the odd entry is its correponding
value) to a dictionary.
Parameters
----------
lst : list
Input list.
Returns
-------
res_dct : dict
A dictionary with every element mapped with
its successive element starting from index 0.
Examples
--------
>>> lst = [5, 9, 3, 6, 2, 7]
>>> list_to_dict(lst)
{5: 9, 3: 6, 2: 7}
"""
res_dct = {lst[i]: lst[i + 1] for i in range(0, len(lst), 2)}
return res_dct
[docs]def scale_list(list_):
"""
Returns a scaled list with the minimum value
subtracted from each element of the corresponding list.
Parameters
----------
list_ : list
Input list.
Returns
-------
scaled_list : list
Scaled list.
Examples
--------
>>> list_ = [6, 3, 5, 11, 3, 2, 8, 6]
>>> scale_list(list_)
[4, 1, 3, 9, 1, 0, 6, 4]
"""
scaled_list = [i - min(list_) for i in list_]
return scaled_list
[docs]def list_kJ_kcal(list_):
"""
Convert the elements in the list from
kiloJoules units to kiloCalories units.
Parameters
----------
list_ : list
List with elements in units of kJ.
Returns
-------
converted_list : list
List with elements in units of kcal.
Examples
--------
>>> list_ = [6, 3, 5]
>>> list_kJ_kcal(list_)
[1.4340344168260037, 0.7170172084130019, 1.1950286806883366]
"""
converted_list = [i / 4.184 for i in list_]
return converted_list
[docs]def list_hartree_kcal(list_):
"""
Convert the elements in the list from
hartree units to kiloCalories units.
Parameters
----------
list_ : list
List with elements in units of hartree.
Returns
-------
converted_list : list
List with elements in units of kcal.
Examples
--------
>>> list_ = [6, 3, 5]
>>> list_hartree_kcal(list_)
[3765.0564000000004, 1882.5282000000002, 3137.547]
"""
converted_list = [i * 627.5094 for i in list_]
return converted_list
[docs]def xyz_to_pdb(xyz_file, coords_file, template_pdb, system_pdb):
"""
Converts a XYZ file to a PDB file.
Parameters
----------
xyz_file : str
XYZ file containing the coordinates of the system.
coords_file : str
A text file containing the coordinates part of XYZ file.
template_pdb : str
A pdb file to be used as a template for the required PDB.
system_pdb : str
Output PDB file with the coordinates updated in the
template pdb using XYZ file.
"""
with open(xyz_file, "r") as f:
lines = f.readlines()
needed_lines = lines[2:]
with open(coords_file, "w") as f:
for i in needed_lines:
f.write(i)
df = pd.read_csv(coords_file, header=None, delimiter=r"\s+")
df.columns = ["atom", "x", "y", "z"]
ppdb = PandasPdb()
ppdb.read_pdb(template_pdb)
ppdb.df["ATOM"]["x_coord"] = df["x"]
ppdb.df["ATOM"]["y_coord"] = df["y"]
ppdb.df["ATOM"]["z_coord"] = df["z"]
ppdb.to_pdb(system_pdb)
[docs]def generate_xml_from_pdb_sdf(system_pdb, system_sdf, system_xml):
"""
Generates an openforcefield xml file from the pdb file.
Parameters
----------
system_pdb : str
Input PDB file.
system_sdf : str
SDF file of the system.
system_xml : str
XML force field file generated using PDB and SDF files.
"""
# command = "babel -ipdb " + system_pdb + " -osdf " + system_sdf
command = "obabel -ipdb " + system_pdb + " -osdf -O " + system_sdf
os.system(command)
# off_molecule = openforcefield.topology.Molecule(system_sdf)
off_molecule = Molecule(system_sdf)
# force_field = openforcefield.typing.engines.smirnoff.ForceField("openff_unconstrained-1.0.0.offxml")
force_field = ForceField("openff_unconstrained-1.0.0.offxml")
system = force_field.create_openmm_system(off_molecule.to_topology())
pdbfile = simtk.openmm.app.PDBFile(system_pdb)
structure = parmed.openmm.load_topology(
pdbfile.topology, system, xyz=pdbfile.positions
)
with open(system_xml, "w") as f:
f.write(simtk.openmm.XmlSerializer.serialize(system))
[docs]def generate_xml_from_charged_pdb_sdf(
system_pdb,
system_init_sdf,
system_sdf,
num_charge_atoms,
index_charge_atom_1,
charge_atom_1,
system_xml,
):
"""
Generates an openforcefield xml file from the pdb
file via SDF file and openforcefield.
Parameters
----------
system_pdb : str
Input PDB file.
system_init_sdf : str
SDF file for the system excluding charge information.
system_sdf : str
SDF file of the system.
num_charge_atoms : int
Total number of charged atoms in the PDB.
index_charge_atom_1 : int
Index of the first charged atom.
charge_atom_1 : float
Charge on first charged atom.
system_xml : str
XML force field file generated using PDB and SDF files.
"""
# command = "babel -ipdb " + system_pdb + " -osdf " + system_init_sdf
command = "obabel -ipdb " + system_pdb + " -osdf -O " + system_init_sdf
os.system(command)
with open(system_init_sdf, "r") as f1:
filedata = f1.readlines()
filedata = filedata[:-2]
with open(system_sdf, "w+") as out:
for i in filedata:
out.write(i)
line_1 = (
"M CHG "
+ str(num_charge_atoms)
+ " "
+ str(index_charge_atom_1)
+ " "
+ str(charge_atom_1)
+ "\n"
)
line_2 = "M END" + "\n"
line_3 = "$$$$"
out.write(line_1)
out.write(line_2)
out.write(line_3)
# off_molecule = openforcefield.topology.Molecule(system_sdf)
off_molecule = Molecule(system_sdf)
# force_field = openforcefield.typing.engines.smirnoff.ForceField("openff_unconstrained-1.0.0.offxml")
force_field = ForceField("openff_unconstrained-1.0.0.offxml")
system = force_field.create_openmm_system(off_molecule.to_topology())
pdbfile = simtk.openmm.app.PDBFile(system_pdb)
structure = parmed.openmm.load_topology(
pdbfile.topology, system, xyz=pdbfile.positions
)
with open(system_xml, "w") as f:
f.write(simtk.openmm.XmlSerializer.serialize(system))
[docs]def get_dihedrals(qm_scan_file):
"""
Returns dihedrals from the torsiondrive scan file.
Parameters
----------
qm_scan_file : str
Output scan file containing torsiondrive scans.
Returns
-------
dihedrals : list
List of all the dihedral values from the qm scan file.
"""
with open(qm_scan_file, "r") as f:
lines = f.readlines()
energy_dihedral_lines = []
for i in range(len(lines)):
if "Dihedral" in lines[i]:
energy_dihedral_lines.append(lines[i])
dihedrals = []
for i in energy_dihedral_lines:
energy_dihedral = i
energy_dihedral = re.findall(r"[-+]?\d+[.]?\d*", energy_dihedral)
dihedral = float(energy_dihedral[0])
dihedrals.append(dihedral)
return dihedrals
[docs]def get_qm_energies(qm_scan_file):
"""
Returns QM optimized energies from the torsiondrive
scan file.
Parameters
----------
qm_scan_file : str
Output scan file containing torsiondrive scans.
Returns
-------
qm_energies : list
List of all the qm optimiseed energies extracted from the torsiondrive
scan file.
"""
with open(qm_scan_file, "r") as f:
lines = f.readlines()
energy_dihedral_lines = []
for i in range(len(lines)):
if "Dihedral" in lines[i]:
energy_dihedral_lines.append(lines[i])
qm_energies = []
for i in energy_dihedral_lines:
energy_dihedral = i
energy_dihedral = re.findall(r"[-+]?\d+[.]?\d*", energy_dihedral)
energy = float(energy_dihedral[1])
qm_energies.append(energy)
return qm_energies
[docs]def generate_mm_pdbs(qm_scan_file, template_pdb):
"""
Generate PDBs from the torsiondrive scan file
based on a template PDB.
"""
with open(qm_scan_file, "r") as f:
lines = f.readlines()
energy_dihedral_lines = []
for i in range(len(lines)):
if "Dihedral" in lines[i]:
energy_dihedral_lines.append(lines[i])
dihedrals = []
for i in energy_dihedral_lines:
energy_dihedral = i
energy_dihedral = re.findall(r"[-+]?\d+[.]?\d*", energy_dihedral)
dihedral = float(energy_dihedral[0])
dihedrals.append(dihedral)
lines_markers = []
for i in range(len(lines)):
if "Dihedral" in lines[i]:
lines_markers.append(i)
lines_markers.append(len(lines) + 1)
for i in range(len(lines_markers) - 1):
# pdb_file_to_write = str(dihedrals[i]) + ".pdb"
if dihedrals[i] > 0:
pdb_file_to_write = "plus_" + str(abs(dihedrals[i])) + ".pdb"
if dihedrals[i] < 0:
pdb_file_to_write = "minus_" + str(abs(dihedrals[i])) + ".pdb"
to_begin = lines_markers[i]
to_end = lines_markers[i + 1]
lines_to_write = lines[to_begin + 1 : to_end - 1]
x_coords = []
y_coords = []
z_coords = []
for i in lines_to_write:
coordinates = i
coordinates = re.findall(r"[-+]?\d+[.]?\d*", coordinates)
x = float(coordinates[0])
y = float(coordinates[1])
z = float(coordinates[2])
x_coords.append(x)
y_coords.append(y)
z_coords.append(z)
ppdb = PandasPdb()
ppdb.read_pdb(template_pdb)
ppdb.df["ATOM"]["x_coord"] = x_coords
ppdb.df["ATOM"]["y_coord"] = y_coords
ppdb.df["ATOM"]["z_coord"] = z_coords
ppdb.to_pdb(pdb_file_to_write)
[docs]def remove_mm_files(qm_scan_file):
"""
Delete all generated PDB files.
Parameters
----------
qm_scan_file : str
Output scan file containing torsiondrive scans.
"""
mm_pdb_list = []
for i in get_dihedrals(qm_scan_file):
if i > 0:
pdb_file = "plus_" + str(abs(i)) + ".pdb"
if i < 0:
pdb_file = "minus_" + str(abs(i)) + ".pdb"
mm_pdb_list.append(pdb_file)
for i in mm_pdb_list:
command = "rm -rf " + i
os.system(command)
command = "rm -rf " + i[:-4] + ".inpcrd"
os.system(command)
command = "rm -rf " + i[:-4] + ".prmtop"
os.system(command)
[docs]def get_non_torsion_mm_energy(system_pdb, load_topology, system_xml):
"""
Returns sum of all the non-torsional energies (that
includes HarmonicBondForce, HarmonicAngleForce
and NonBondedForce) of the system from the PDB
file given the topology and the forcefield file.
Parameters
----------
system_pdb : str
System PDB file to load the openmm system topology
and coordinates.
load_topology : {"openmm", "parmed"}
Argument to specify how to load the topology.
system_xml : str
XML force field file for the openmm system.
Returns
-------
Sum of all the non-torsional energies of the system.
"""
system_prmtop = system_pdb[:-4] + ".prmtop"
system_inpcrd = system_pdb[:-4] + ".inpcrd"
if load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(system_pdb, structure=True).topology,
parmed.load_file(system_xml),
)
if load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(system_pdb).topology,
parmed.load_file(system_xml),
)
openmm_system.save(system_prmtop, overwrite=True)
openmm_system.coordinates = parmed.load_file(
system_pdb, structure=True
).coordinates
openmm_system.save(system_inpcrd, overwrite=True)
parm = parmed.load_file(system_prmtop, system_inpcrd)
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
# print(prmtop_energy_decomposition)
prmtop_energy_decomposition_value_no_torsion = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
return sum(prmtop_energy_decomposition_value_no_torsion)
[docs]def get_mm_potential_energies(qm_scan_file, load_topology, system_xml):
"""
Returns potential energy of the system from the PDB file
given the topology and the forcefield file.
Parameters
----------
qm_scan_file : str
Output scan file containing torsiondrive scans.
load_topology : {"openmm", "parmed"}
Argument to spcify how to load the topology.
system_xml : str
XML file to load the openmm system.
Returns
-------
mm_potential_energies : list
List of all the non torsion mm energies for the
generated PDB files.
"""
mm_pdb_list = []
for i in get_dihedrals(qm_scan_file):
if i > 0:
pdb_file = "plus_" + str(abs(i)) + ".pdb"
if i < 0:
pdb_file = "minus_" + str(abs(i)) + ".pdb"
mm_pdb_list.append(pdb_file)
for i in mm_pdb_list:
mm_pdb_file = i
mm_potential_energies = []
for i in mm_pdb_list:
mm_pdb_file = i
mm_energy = get_non_torsion_mm_energy(
system_pdb=i, load_topology=load_topology, system_xml=system_xml,
)
mm_potential_energies.append(mm_energy)
return mm_potential_energies
[docs]def list_diff(list_1, list_2):
"""
Returns the difference between two lists as a list.
Parameters
----------
list_1 : list
First list
list_2 : list
Second list.
Returns
-------
diff_list : list
List containing the diferences between the elements of
the two lists.
Examples
--------
>>> list_1 = [4, 2, 8, 3, 0, 6, 7]
>>> list_2 = [5, 3, 1, 5, 6, 0, 4]
>>> list_diff(list_1, list_2)
[-1, -1, 7, -2, -6, 6, 3]
"""
diff_list = []
zipped_list = zip(list_1, list_2)
for list1_i, list2_i in zipped_list:
diff_list.append(list1_i - list2_i)
return diff_list
[docs]def dihedral_energy(x, k1, k2, k3, k4=0):
"""
Expression for the dihedral energy.
"""
energy_1 = k1 * (1 + np.cos(1 * x * 0.01745))
energy_2 = k2 * (1 - np.cos(2 * x * 0.01745))
energy_3 = k3 * (1 + np.cos(3 * x * 0.01745))
energy_4 = k4 * (1 - np.cos(4 * x * 0.01745))
dihedral_energy = energy_1 + energy_2 + energy_3 + energy_4
return dihedral_energy
[docs]def error_function(delta_qm, delta_mm):
"""
Root Mean Squared Error.
"""
squared_error = np.square(np.subtract(delta_qm, delta_mm))
mean_squared_error = squared_error.mean()
root_mean_squared_error = math.sqrt(mean_squared_error)
return root_mean_squared_error
[docs]def error_function_boltzmann(delta_qm, delta_mm, T):
"""
Boltzmann Root Mean Squared Error.
"""
kb = 3.297623483 * 10 ** (-24) # in cal/K
delta_qm_boltzmann_weighted = [np.exp(-i / (kb * T)) for i in delta_qm]
squared_error = (
np.square(np.subtract(delta_qm, delta_mm))
* delta_qm_boltzmann_weighted
)
mean_squared_error = squared_error.mean()
root_mean_squared_error = math.sqrt(mean_squared_error)
return root_mean_squared_error
[docs]def gen_init_guess(qm_scan_file, load_topology, system_xml):
"""
Initial guess for the torsional parameter.
Parameters
----------
qm_scan_file : str
Output scan file containing torsiondrive scans.
load_topology : {"openmm", "parmed"}
Argument to speify how to load the topology.
system_xml : str
XML force field file for the system.
Returns
-------
k_init_guess : list
Initial guess for the torsional parameters.
"""
x = get_dihedrals(qm_scan_file)
y = scale_list(
list_=get_mm_potential_energies(
qm_scan_file=qm_scan_file,
load_topology=load_topology,
system_xml=system_xml,
)
)
init_vals = [0.0, 0.0, 0.0, 0.0]
k_init_guess, covar = scipy.optimize.curve_fit(
dihedral_energy, x, y, p0=init_vals
)
for i in range(len(k_init_guess)):
if k_init_guess[i] < 0:
k_init_guess[i] = 0
return k_init_guess
[docs]def objective_function(k_array, x, delta_qm):
"""
Objective function for the torsional parameter fitting.
"""
delta_mm = dihedral_energy(
x, k1=k_array[0], k2=k_array[1], k3=k_array[2], k4=k_array[3]
)
loss_function = error_function(delta_qm, delta_mm)
return loss_function
[docs]def fit_params(qm_scan_file, load_topology, system_xml, method):
"""
Optimization of the objective function.
"""
k_guess = gen_init_guess(
qm_scan_file=qm_scan_file,
load_topology=load_topology,
system_xml=system_xml,
)
x_data = np.array(get_dihedrals(qm_scan_file))
delta_qm = np.array(
scale_list(list_hartree_kcal(list_=get_qm_energies(qm_scan_file)))
)
optimise = scipy.optimize.minimize(
objective_function,
k_guess,
args=(x_data, delta_qm),
method=method,
bounds=[(0.00, None), (0.00, None), (0.00, None), (0.00, None),],
)
return optimise.x
[docs]def get_tor_params(
qm_scan_file, template_pdb, load_topology, system_xml, method
):
"""
Returns the fitted torsional parameters.
"""
qm_e = get_qm_energies(qm_scan_file=qm_scan_file)
qm_e_kcal = list_hartree_kcal(qm_e)
delta_qm = scale_list(qm_e_kcal)
generate_mm_pdbs(qm_scan_file=qm_scan_file, template_pdb=template_pdb)
mm_pe_no_torsion_kcal = get_mm_potential_energies(
qm_scan_file=qm_scan_file,
load_topology=load_topology,
system_xml=system_xml,
)
delta_mm = scale_list(mm_pe_no_torsion_kcal)
opt_param = fit_params(
qm_scan_file=qm_scan_file,
load_topology=load_topology,
system_xml=system_xml,
method=method,
)
return opt_param
[docs]def get_torsional_lines(
template_pdb,
system_xml,
qm_scan_file,
load_topology,
method,
dihedral_text_file,
):
"""
Returns the torsional lines for the XML forcefield file.
"""
opt_param = get_tor_params(
qm_scan_file=qm_scan_file,
template_pdb=template_pdb,
load_topology=load_topology,
system_xml=system_xml,
method=method,
)
dihedral_text = open(dihedral_text_file, "r")
dihedral_text_lines = dihedral_text.readlines()
atom_numbers = dihedral_text_lines[-1]
atom_index_from_1 = [
int(re.findall(r"\d+", atom_numbers)[0]),
int(re.findall(r"\d+", atom_numbers)[1]),
int(re.findall(r"\d+", atom_numbers)[2]),
int(re.findall(r"\d+", atom_numbers)[3]),
]
atom_index = [i - 1 for i in atom_index_from_1]
atom_index_lines = (
" "
+ "p1="
+ '"'
+ str(atom_index[0])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(atom_index[1])
+ '"'
+ " "
+ "p3="
+ '"'
+ str(atom_index[2])
+ '"'
+ " "
+ "p4="
+ '"'
+ str(atom_index[3])
+ '"'
+ " "
)
tor_lines = []
for i in range(len(opt_param)):
line_to_append = (
" "
+ "<Torsion "
+ "k="
+ '"'
+ str(round(opt_param[i], 8))
+ '"'
+ atom_index_lines
+ "periodicity="
+ '"'
+ str(i + 1)
+ '"'
+ " "
+ "phase="
+ '"'
+ "0"
+ '"'
+ "/>"
)
# print(line_to_append)
tor_lines.append(line_to_append)
return tor_lines
[docs]def singular_resid(pdbfile, qmmmrebind_init_file):
"""
Returns a PDB file with chain ID = A
Parameters
----------
pdbfile: str
Input PDB file
qmmmrebind_init_file: str
Output PDB file
"""
ppdb = PandasPdb().read_pdb(pdbfile)
ppdb.df["HETATM"]["chain_id"] = "A"
ppdb.df["ATOM"]["chain_id"] = "A"
ppdb.to_pdb(
path=qmmmrebind_init_file, records=None, gz=False, append_newline=True
)
[docs]def relax_init_structure(
pdbfile,
prmtopfile,
qmmmrebindpdb,
sim_output="output.pdb",
sim_steps=100000,
):
"""
Minimizing the initial PDB file with the given topology
file
Parameters
----------
pdbfile: str
Input PDB file.
prmtopfile : str
Input prmtop file.
qmmmrebind_init_file: str
Output PDB file.
sim_output: str
Simulation output trajectory file.
sim_steps: int
MD simulation steps.
"""
prmtop = simtk.openmm.app.AmberPrmtopFile(prmtopfile)
pdb = simtk.openmm.app.PDBFile(pdbfile)
system = prmtop.createSystem(
nonbondedMethod=simtk.openmm.app.PME,
nonbondedCutoff=1 * simtk.unit.nanometer,
constraints=simtk.openmm.app.HBonds,
)
integrator = simtk.openmm.LangevinIntegrator(
300 * simtk.unit.kelvin,
1 / simtk.unit.picosecond,
0.002 * simtk.unit.picoseconds,
)
simulation = simtk.openmm.app.Simulation(
prmtop.topology, system, integrator
)
simulation.context.setPositions(pdb.positions)
print(simulation.context.getState(getEnergy=True).getPotentialEnergy())
simulation.minimizeEnergy(maxIterations=10000000)
print(simulation.context.getState(getEnergy=True).getPotentialEnergy())
simulation.reporters.append(
simtk.openmm.app.PDBReporter(sim_output, int(sim_steps / 10))
)
simulation.reporters.append(
simtk.openmm.app.StateDataReporter(
stdout,
int(sim_steps / 10),
step=True,
potentialEnergy=True,
temperature=True,
)
)
simulation.reporters.append(
simtk.openmm.app.PDBReporter(qmmmrebindpdb, sim_steps)
)
simulation.step(sim_steps)
command = "rm -rf " + sim_output
os.system(command)
[docs]def truncate(x):
"""
Returns a float or an integer with an exact number
of characters.
Parameters
----------
x: str
input value
"""
if len(str(int(float(x)))) == 1:
x = format(x, ".8f")
if len(str(int(float(x)))) == 2:
x = format(x, ".7f")
if len(str(int(float(x)))) == 3:
x = format(x, ".6f")
if len(str(int(float(x)))) == 4:
x = format(x, ".5f")
if len(str(x)) > 10:
x = round(x, 10)
return x
[docs]def add_vectors_inpcrd(pdbfile, inpcrdfile):
"""
Adds periodic box dimensions to the inpcrd file
Parameters
----------
pdbfile: str
PDB file containing the periodic box information.
inpcrdfile: str
Input coordinate file.
"""
pdbfilelines = open(pdbfile, "r").readlines()
for i in pdbfilelines:
if "CRYST" in i:
vector_list = re.findall(r"[-+]?\d*\.\d+|\d+", i)
vector_list = [float(i) for i in vector_list]
vector_list = vector_list[1 : 1 + 6]
line_to_add = (
" "
+ truncate(vector_list[0])
+ " "
+ truncate(vector_list[1])
+ " "
+ truncate(vector_list[2])
+ " "
+ truncate(vector_list[3])
+ " "
+ truncate(vector_list[4])
+ " "
+ truncate(vector_list[5])
)
print(line_to_add)
with open(inpcrdfile, "a+") as f:
f.write(line_to_add)
[docs]def add_dim_prmtop(pdbfile, prmtopfile):
"""
Adds periodic box dimensions flag in the prmtop file.
Parameters
----------
prmtopfile: str
Input prmtop file.
pdbfile: str
PDB file containing the periodic box information.
"""
pdbfilelines = open(pdbfile, "r").readlines()
for i in pdbfilelines:
if "CRYST" in i:
vector_list = re.findall(r"[-+]?\d*\.\d+|\d+", i)
vector_list = [float(i) for i in vector_list]
vector_list = vector_list[1 : 1 + 6]
vector_list = [i / 10 for i in vector_list]
vector_list = [truncate(i) for i in vector_list]
vector_list = [i + "E+01" for i in vector_list]
line3 = (
" "
+ vector_list[3]
+ " "
+ vector_list[0]
+ " "
+ vector_list[1]
+ " "
+ vector_list[2]
)
print(line3)
line1 = "%FLAG BOX_DIMENSIONS"
line2 = "%FORMAT(5E16.8)"
with open(prmtopfile) as f1, open("intermediate.prmtop", "w") as f2:
for line in f1:
if line.startswith("%FLAG RADIUS_SET"):
line = line1 + "\n" + line2 + "\n" + line3 + "\n" + line
f2.write(line)
command = "rm -rf " + prmtopfile
os.system(command)
command = "mv intermediate.prmtop " + prmtopfile
os.system(command)
[docs]def add_period_prmtop(parm_file, ifbox):
"""
Changes the value of IFBOX if needed for the prmtop / parm file.
Set to 1 if standard periodic box and 2 when truncated octahedral.
"""
with open(parm_file) as f:
parm_lines = f.readlines()
lines_contain = []
for i in range(len(parm_lines)):
if parm_lines[i].startswith("%FLAG POINTERS"):
lines_contain.append(i + 4)
line = parm_lines[lines_contain[0]]
line_new = "%8s %6s %6s %6s %6s %6s %6s %6s %6s %6s" % (
re.findall(r"\d+", line)[0],
re.findall(r"\d+", line)[1],
re.findall(r"\d+", line)[2],
re.findall(r"\d+", line)[3],
re.findall(r"\d+", line)[4],
re.findall(r"\d+", line)[5],
re.findall(r"\d+", line)[6],
str(ifbox),
re.findall(r"\d+", line)[8],
re.findall(r"\d+", line)[9],
)
parm_lines[lines_contain[0]] = line_new + "\n"
with open(parm_file, "w") as f:
for i in parm_lines:
f.write(i)
[docs]def add_solvent_pointers_prmtop(non_reparams_file, reparams_file):
"""
Adds the flag solvent pointers to the topology file.
"""
f_non_params = open(non_reparams_file, "r")
lines_non_params = f_non_params.readlines()
for i in range(len(lines_non_params)):
if "FLAG SOLVENT_POINTERS" in lines_non_params[i]:
to_begin = int(i)
solvent_pointers = lines_non_params[to_begin : to_begin + 3]
file = open(reparams_file, "a")
for i in solvent_pointers:
file.write(i)
[docs]def prmtop_calibration(
prmtopfile="system_qmmmrebind.prmtop",
inpcrdfile="system_qmmmrebind.inpcrd",
):
"""
Standardizes the topology files
Parameters
----------
prmtopfile: str
Input prmtop file.
inpcrdfile: str
Input coordinate file.
"""
parm = parmed.load_file(prmtopfile, inpcrdfile)
parm_1 = parmed.tools.actions.changeRadii(parm, "mbondi3")
parm_1.execute()
parm_2 = parmed.tools.actions.setMolecules(parm)
parm_2.execute()
parm.save(prmtopfile, overwrite=True)
[docs]def run_openmm_prmtop_inpcrd(
pdbfile="system_qmmmrebind.pdb",
prmtopfile="system_qmmmrebind.prmtop",
inpcrdfile="system_qmmmrebind.inpcrd",
sim_output="output.pdb",
sim_steps=10000,
):
"""
Runs OpenMM simulation with inpcrd and prmtop files.
Parameters
----------
pdbfile: str
Input PDB file.
prmtopfile: str
Input prmtop file.
inpcrdfile: str
Input coordinate file.
sim_output: str
Output trajectory file.
sim_steps: int
Simulation steps.
"""
prmtop = simtk.openmm.app.AmberPrmtopFile(prmtopfile)
inpcrd = simtk.openmm.app.AmberInpcrdFile(inpcrdfile)
system = prmtop.createSystem(
nonbondedCutoff=1 * simtk.unit.nanometer,
constraints=simtk.openmm.app.HBonds,
)
integrator = simtk.openmm.LangevinIntegrator(
300 * simtk.unit.kelvin,
1 / simtk.unit.picosecond,
0.002 * simtk.unit.picoseconds,
)
simulation = simtk.openmm.app.Simulation(
prmtop.topology, system, integrator
)
if inpcrd.boxVectors is None:
add_vectors_inpcrd(
pdbfile=pdbfile, inpcrdfile=inpcrdfile,
)
if inpcrd.boxVectors is not None:
simulation.context.setPeriodicBoxVectors(*inpcrd.boxVectors)
print(inpcrd.boxVectors)
simulation.context.setPositions(inpcrd.positions)
print(simulation.context.getState(getEnergy=True).getPotentialEnergy())
simulation.minimizeEnergy(maxIterations=1000000)
print(simulation.context.getState(getEnergy=True).getPotentialEnergy())
simulation.reporters.append(
simtk.openmm.app.PDBReporter(sim_output, int(sim_steps / 10))
)
simulation.reporters.append(
simtk.openmm.app.StateDataReporter(
stdout,
int(sim_steps / 10),
step=True,
potentialEnergy=True,
temperature=True,
)
)
simulation.step(sim_steps)
[docs]def run_openmm_prmtop_pdb(
pdbfile="system_qmmmrebind.pdb",
prmtopfile="system_qmmmrebind.prmtop",
sim_output="output.pdb",
sim_steps=10000,
):
"""
Runs OpenMM simulation with pdb and prmtop files.
Parameters
----------
pdbfile: str
Input PDB file.
prmtopfile: str
Input prmtop file.
sim_output: str
Output trajectory file.
sim_steps: int
Simulation steps.
"""
prmtop = simtk.openmm.app.AmberPrmtopFile(prmtopfile)
pdb = simtk.openmm.app.PDBFile(pdbfile)
system = prmtop.createSystem(
nonbondedCutoff=1 * simtk.unit.nanometer,
constraints=simtk.openmm.app.HBonds,
)
integrator = simtk.openmm.LangevinIntegrator(
300 * simtk.unit.kelvin,
1 / simtk.unit.picosecond,
0.002 * simtk.unit.picoseconds,
)
simulation = simtk.openmm.app.Simulation(
prmtop.topology, system, integrator
)
simulation.context.setPositions(pdb.positions)
print(simulation.context.getState(getEnergy=True).getPotentialEnergy())
simulation.minimizeEnergy(maxIterations=1000000)
print(simulation.context.getState(getEnergy=True).getPotentialEnergy())
simulation.reporters.append(
simtk.openmm.app.PDBReporter(sim_output, int(sim_steps / 10))
)
simulation.reporters.append(
simtk.openmm.app.StateDataReporter(
stdout,
int(sim_steps / 10),
step=True,
potentialEnergy=True,
temperature=True,
)
)
simulation.step(sim_steps)
[docs]def move_qmmmmrebind_files(
prmtopfile="system_qmmmrebind.prmtop",
inpcrdfile="system_qmmmrebind.inpcrd",
pdbfile="system_qmmmrebind.pdb",
):
"""
Moves QMMMReBind generated topology and parameter files
to a new directory .
Parameters
----------
prmtopfile: str
QMMMReBind generated prmtop file.
inpcrdfile: str
QMMMReBind generated inpcrd file.
pdbfile: str
QMMMReBind generated PDB file.
"""
current_pwd = os.getcwd()
command = "rm -rf reparameterized_files"
os.system(command)
command = "mkdir reparameterized_files"
os.system(command)
shutil.copy(
current_pwd + "/" + prmtopfile,
current_pwd + "/" + "reparameterized_files" + "/" + prmtopfile,
)
shutil.copy(
current_pwd + "/" + inpcrdfile,
current_pwd + "/" + "reparameterized_files" + "/" + inpcrdfile,
)
shutil.copy(
current_pwd + "/" + pdbfile,
current_pwd + "/" + "reparameterized_files" + "/" + pdbfile,
)
[docs]def move_qm_files():
"""
Moves QM engine generated files to a new directory .
"""
current_pwd = os.getcwd()
command = "rm -rf qm_data"
os.system(command)
command = "mkdir qm_data"
os.system(command)
command = "cp -r " + "*.com* " + current_pwd + "/" + "qm_data"
os.system(command)
command = "cp -r " + "*.log* " + current_pwd + "/" + "qm_data"
os.system(command)
command = "cp -r " + "*.chk* " + current_pwd + "/" + "qm_data"
os.system(command)
command = "cp -r " + "*.fchk* " + current_pwd + "/" + "qm_data"
os.system(command)
[docs]def move_qmmmrebind_files():
"""
Moves all QMMMREBind files to a new directory.
"""
current_pwd = os.getcwd()
command = "rm -rf qmmmrebind_data"
os.system(command)
command = "mkdir qmmmrebind_data"
os.system(command)
command = "mv " + "*.sdf* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*.txt* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*.pdb* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*.xml* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*.chk* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*.fchk* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*.com* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*.log* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*.inpcrd* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*.prmtop* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*.parm7* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*.out* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*run_command* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*.dat* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
command = "mv " + "*.xyz* " + current_pwd + "/" + "qmmmrebind_data"
os.system(command)
[docs]class PrepareQMMM:
"""
A class used to segregate the QM and MM regions.
This class contain methods to remove the solvent, ions and all
entities that are exclusive of receptor and the ligand. It also
defines the Quantum Mechanical (QM) region and the Molecular
Mechanical (MM) region based upon the distance of the ligand
from the receptor and the chosen number of receptor residues. It
is also assumed that the initial PDB file will have the receptor
followed by the ligand.
...
Attributes
----------
init_pdb : str
Initial PDB file containing the receptor-ligand complex with
solvent, ions, etc.
cleaned_pdb : str
Formatted PDB file containing only the receptor and the ligand.
guest_init_pdb : str
A separate ligand PDB file with atom numbers not beginning from 1.
host_pdb : str
A separate receptor PDB file with atom numbers beginning from 1.
guest_resname : str
Three letter residue ID for the ligand.
guest_pdb : str, optional
Ligand PDB file with atom numbers beginning from 1.
guest_xyz : str, optional
A text file of the XYZ coordinates of the ligand.
distance : float, optional
The distance required to define the QM region of the receptor.
This is the distance between the atoms of the ligand and the
atoms of the receptor.
residue_list : str, optional
A text file of the residue numbers of the receptor within the
proximity (as defined by the distance) from the ligand.
host_qm_atoms : str, optional
A text file of the atom numbers of the receptors in the QM
region.
host_mm_atoms : str, optional
A text file of the atom numbers of the receptors in the MM
region (all atoms except atoms in the QM region)
host_qm_pdb : str, optional
PDB file for the receptor's QM region.
host_mm_pdb : str, optional
PDB file for the receptor's MM region.
qm_pdb : str, optional
PDB file for the QM region (receptor's QM region and the
ligand).
mm_pdb : str, optional
PDB file for the MM region.
host_mm_region_I_atoms : str, optional
A text file of the atom numbers of the receptors in the MM
region preceeding the QM region.
host_mm_region_II_atoms : str, optional
A text file of the atom numbers of the receptors in the MM
region following the QM region.
host_mm_region_I_pdb : str, optional
PDB file of the receptor in the MM region preceeding the
QM region.
host_mm_region_II_pdb : str, optional
PDB file of the receptor in the MM region following the
QM region.
num_residues : int, optional
Number of residues required in the QM region of the receptor.
"""
[docs] def __init__(
self,
init_pdb,
distance,
num_residues,
guest_resname,
cleaned_pdb="system.pdb",
guest_init_pdb="guest_init.pdb",
host_pdb="host.pdb",
guest_pdb="guest_init_II.pdb",
guest_xyz="guest_coord.txt",
residue_list="residue_list.txt",
host_qm_atoms="host_qm.txt",
host_mm_atoms="host_mm.txt",
host_qm_pdb="host_qm.pdb",
host_mm_pdb="host_mm.pdb",
qm_pdb="qm.pdb",
mm_pdb="mm.pdb",
host_mm_region_I_atoms="host_mm_region_I.txt",
host_mm_region_II_atoms="host_mm_region_II.txt",
host_mm_region_I_pdb="host_mm_region_I.pdb",
host_mm_region_II_pdb="host_mm_region_II.pdb",
):
self.init_pdb = init_pdb
self.distance = distance
self.num_residues = num_residues
self.guest_resname = guest_resname
self.cleaned_pdb = cleaned_pdb
self.guest_init_pdb = guest_init_pdb
self.host_pdb = host_pdb
self.guest_pdb = guest_pdb
self.guest_xyz = guest_xyz
self.residue_list = residue_list
self.host_qm_atoms = host_qm_atoms
self.host_mm_atoms = host_mm_atoms
self.host_qm_pdb = host_qm_pdb
self.host_mm_pdb = host_mm_pdb
self.qm_pdb = qm_pdb
self.mm_pdb = mm_pdb
self.host_mm_region_I_atoms = host_mm_region_I_atoms
self.host_mm_region_II_atoms = host_mm_region_II_atoms
self.host_mm_region_I_pdb = host_mm_region_I_pdb
self.host_mm_region_II_pdb = host_mm_region_II_pdb
[docs] def clean_up(self):
"""
Reads the given PDB file, removes all entities except the
receptor and ligand and saves a new pdb file.
"""
ions = [
"Na+",
"Cs+",
"K+",
"Li+",
"Rb+",
"Cl-",
"Br-",
"F-",
"I-",
"Ca2",
]
intermediate_file_1 = self.cleaned_pdb[:-4] + "_intermediate_1.pdb"
intermediate_file_2 = self.cleaned_pdb[:-4] + "_intermediate_2.pdb"
command = (
"pdb4amber -i "
+ self.init_pdb
+ " -o "
+ intermediate_file_1
+ " --noter --dry"
)
os.system(command)
to_delete = (
intermediate_file_1[:-4] + "_nonprot.pdb",
intermediate_file_1[:-4] + "_renum.txt",
intermediate_file_1[:-4] + "_sslink",
intermediate_file_1[:-4] + "_water.pdb",
)
os.system("rm -rf " + " ".join(to_delete))
with open(intermediate_file_1) as f1, open(
intermediate_file_2, "w") as f2:
for line in f1:
if not any(ion in line for ion in ions):
f2.write(line)
with open(intermediate_file_2, "r") as f1:
filedata = f1.read()
filedata = filedata.replace("HETATM", "ATOM ")
with open(self.cleaned_pdb, "w") as f2:
f2.write(filedata)
command = "rm -rf " + intermediate_file_1 + " " + intermediate_file_2
os.system(command)
[docs] def create_host_guest(self):
"""
Saves separate receptor and ligand PDB files.
"""
with open(self.cleaned_pdb) as f1, open(self.host_pdb, "w") as f2:
for line in f1:
if not self.guest_resname in line and not "CRYST1" in line:
f2.write(line)
with open(self.cleaned_pdb) as f1, open(
self.guest_init_pdb, "w"
) as f2:
for line in f1:
if self.guest_resname in line or "END" in line:
f2.write(line)
[docs] def realign_guest(self):
"""
Saves a ligand PDB file with atom numbers beginning from 1.
"""
ppdb = PandasPdb()
ppdb.read_pdb(self.guest_init_pdb)
to_subtract = min(ppdb.df["ATOM"]["atom_number"]) - 1
ppdb.df["ATOM"]["atom_number"] = (
ppdb.df["ATOM"]["atom_number"] - to_subtract
)
intermediate_file_1 = self.guest_pdb[:-4] + "_intermediate_1.pdb"
intermediate_file_2 = self.guest_pdb[:-4] + "_intermediate_2.pdb"
ppdb.to_pdb(path=intermediate_file_1)
command = (
"pdb4amber -i "
+ intermediate_file_1
+ " -o "
+ intermediate_file_2
)
os.system(command)
to_delete = (
intermediate_file_2[:-4] + "_nonprot.pdb",
intermediate_file_2[:-4] + "_renum.txt",
intermediate_file_2[:-4] + "_sslink",
)
os.system("rm -rf " + " ".join(to_delete))
with open(intermediate_file_2, "r") as f1:
filedata = f1.read()
filedata = filedata.replace("HETATM", "ATOM ")
with open(self.guest_pdb, "w") as f2:
f2.write(filedata)
command = "rm -rf " + intermediate_file_1 + " " + intermediate_file_2
os.system(command)
[docs] def get_guest_coord(self):
"""
Saves a text file of the XYZ coordinates of the ligand.
"""
ppdb = PandasPdb()
ppdb.read_pdb(self.guest_pdb)
xyz = ppdb.df["ATOM"][["x_coord", "y_coord", "z_coord"]]
xyz_to_list = xyz.values.tolist()
np.savetxt(self.guest_xyz, xyz_to_list)
[docs] def get_qm_resids(self):
"""
Saves a text file of the residue numbers of the receptor within the
proximity (as defined by the distance) from the ligand.
"""
guest_coord_list = np.loadtxt(self.guest_xyz)
host_atom_list = []
for i in range(len(guest_coord_list)):
reference_point = guest_coord_list[i]
# TODO: move reads outside of loop
ppdb = PandasPdb()
ppdb.read_pdb(self.host_pdb)
distances = ppdb.distance(xyz=reference_point, records=("ATOM"))
all_within_distance = ppdb.df["ATOM"][
distances < float(self.distance)
]
host_df = all_within_distance["atom_number"]
host_list = host_df.values.tolist()
host_atom_list.append(host_list)
host_atom_list = list(itertools.chain(*host_atom_list))
host_atom_list = set(host_atom_list)
host_atom_list = list(host_atom_list)
host_atom_list.sort()
ppdb = PandasPdb()
ppdb.read_pdb(self.host_pdb)
df = ppdb.df["ATOM"][["atom_number", "residue_number", "residue_name"]]
index_list = []
for i in host_atom_list:
indices = np.where(df["atom_number"] == i)
indices = list(indices)[0]
indices = list(indices)
index_list.append(indices)
index_list = list(itertools.chain.from_iterable(index_list))
df1 = df.iloc[
index_list,
]
# TODO: make it write list of integers
resid_num = list(df1.residue_number.unique())
np.savetxt(self.residue_list, resid_num, fmt="%i")
[docs] def get_host_qm_mm_atoms(self):
"""
Saves a text file of the atom numbers of the receptors in the QM
region and MM region separately.
"""
resid_num = np.loadtxt(self.residue_list)
# approximated_res_list = [int(i) for i in resid_num]
approximated_res_list = []
# TODO: what is this doing?
for i in range(
int(statistics.median(resid_num))
- int(int(self.num_residues) / 2),
int(statistics.median(resid_num))
+ int(int(self.num_residues) / 2),
):
approximated_res_list.append(i)
ppdb = PandasPdb()
ppdb.read_pdb(self.host_pdb)
df = ppdb.df["ATOM"][["atom_number", "residue_number", "residue_name"]]
host_index_nested_list = []
for i in approximated_res_list:
indices = np.where(df["residue_number"] == i)
#TODO: the program seems to error when this line is removed, which
# makes no sense.
indices = list(indices)[0]
indices = list(indices)
host_index_nested_list.append(indices)
host_index_list = list(
itertools.chain.from_iterable(host_index_nested_list)
)
df_atom = df.iloc[host_index_list]
df_atom_number = df_atom["atom_number"]
host_atom_list = df_atom_number.values.tolist()
selected_atoms = []
selected_atoms.extend(host_atom_list)
ppdb = PandasPdb()
ppdb.read_pdb(self.host_pdb)
len_atoms = []
for i in range(len(ppdb.df["ATOM"])):
len_atoms.append(i + 1)
non_selected_atoms = list(set(len_atoms).difference(selected_atoms))
assert len(non_selected_atoms) + len(selected_atoms) == len(len_atoms),\
"Sum of the atoms in the selected and non-selected region "\
"does not equal the length of list of total atoms."
np.savetxt(self.host_qm_atoms, selected_atoms, fmt="%i")
np.savetxt(self.host_mm_atoms, non_selected_atoms, fmt="%i")
[docs] def save_host_pdbs(self):
"""
Saves a PDB file for the receptor's QM region and MM
region separately.
"""
selected_atoms = np.loadtxt(self.host_qm_atoms)
# TODO: not necessary if savetxt writes in integers
selected_atoms = [int(i) for i in selected_atoms]
ppdb = PandasPdb()
ppdb.read_pdb(self.host_pdb)
for i in selected_atoms:
ppdb.df["ATOM"] = ppdb.df["ATOM"][
ppdb.df["ATOM"]["atom_number"] != i
]
ppdb.to_pdb(
path=self.host_mm_pdb, records=None, gz=False, append_newline=True,
)
non_selected_atoms = np.loadtxt(self.host_mm_atoms)
non_selected_atoms = [int(i) for i in non_selected_atoms]
ppdb = PandasPdb()
ppdb.read_pdb(self.host_pdb)
for i in non_selected_atoms:
ppdb.df["ATOM"] = ppdb.df["ATOM"][
ppdb.df["ATOM"]["atom_number"] != i
]
ppdb.to_pdb(
path=self.host_qm_pdb, records=None, gz=False, append_newline=True,
)
[docs] def get_host_mm_region_atoms(self):
"""
Saves a text file for the atoms of the receptor's MM region
preceding the QM region and saves another text file for the
atoms of the receptor's MM region folllowing the QM region.
"""
resid_num = np.loadtxt(self.residue_list)
approximated_res_list = []
for i in range(
int(statistics.median(resid_num))
- int(int(self.num_residues) / 2),
int(statistics.median(resid_num))
+ int(int(self.num_residues) / 2),
):
approximated_res_list.append(i)
# print(approximated_res_list)
ppdb = PandasPdb()
ppdb.read_pdb(self.host_pdb)
df = ppdb.df["ATOM"][["residue_number"]]
res_list = list(set(df["residue_number"].to_list()))
res_mm_list = list(set(res_list).difference(approximated_res_list))
# print(res_mm_list)
res_mm_region_I_list = []
# TODO: This can probably be made into a single loop by comparing i
# to the maximum value within approximated_res_list
for i in res_mm_list:
for j in approximated_res_list:
if i < j:
res_mm_region_I_list.append(i)
res_mm_region_I_list = list(set(res_mm_region_I_list))
res_mm_region_II_list = list(
set(res_mm_list).difference(res_mm_region_I_list)
)
# print(res_mm_region_II_list)
ppdb.read_pdb(self.host_mm_pdb)
df = ppdb.df["ATOM"][["atom_number", "residue_number", "residue_name"]]
mm_region_I_index_nested_list = []
for i in res_mm_region_I_list:
indices = np.where(df["residue_number"] == i)
# TODO: again, this is strange code
indices = list(indices)[0]
indices = list(indices)
mm_region_I_index_nested_list.append(indices)
mm_region_I_index_list = list(
itertools.chain.from_iterable(mm_region_I_index_nested_list)
)
df_atom = df.iloc[mm_region_I_index_list]
df_atom_number = df_atom["atom_number"]
mm_region_I_atom_list = df_atom_number.values.tolist()
mm_region_I_atoms = []
mm_region_I_atoms.extend(mm_region_I_atom_list)
mm_region_II_index_nested_list = []
for i in res_mm_region_II_list:
indices = np.where(df["residue_number"] == i)
# TODO: again, this is strange code
indices = list(indices)[0]
indices = list(indices)
mm_region_II_index_nested_list.append(indices)
mm_region_II_index_list = list(
itertools.chain.from_iterable(mm_region_II_index_nested_list)
)
df_atom = df.iloc[mm_region_II_index_list]
df_atom_number = df_atom["atom_number"]
mm_region_II_atom_list = df_atom_number.values.tolist()
mm_region_II_atoms = []
mm_region_II_atoms.extend(mm_region_II_atom_list)
ppdb.read_pdb(self.host_mm_pdb)
len_atoms = []
for i in range(len(ppdb.df["ATOM"])):
len_atoms.append(i + 1)
assert len(mm_region_I_atoms) + len(mm_region_II_atoms) == len(len_atoms),\
"Sum of the atoms in the selected and non-selected region "\
"does not equal the length of list of total atoms."
np.savetxt(self.host_mm_region_I_atoms, mm_region_I_atoms, fmt="%i")
np.savetxt(self.host_mm_region_II_atoms, mm_region_II_atoms, fmt="%i")
[docs] def save_host_mm_regions_pdbs(self):
"""
Saves a PDB file for the receptor's MM region preceding
the QM region and saves another PDB file for the receptor's
MM region folllowing the QM region.
"""
mm_region_I_atoms = np.loadtxt(self.host_mm_region_I_atoms)
mm_region_I_atoms = [int(i) for i in mm_region_I_atoms]
mm_region_II_atoms = np.loadtxt(self.host_mm_region_II_atoms)
mm_region_II_atoms = [int(i) for i in mm_region_II_atoms]
# NOTE: this is a slightly confusing way to define the atoms to
# write to a PDB - the members that are *not* in a section, rather
# than the members that are.
ppdb = PandasPdb()
ppdb.read_pdb(self.host_mm_pdb)
for i in mm_region_II_atoms:
ppdb.df["ATOM"] = ppdb.df["ATOM"][
ppdb.df["ATOM"]["atom_number"] != i
]
ppdb.to_pdb(
path=self.host_mm_region_I_pdb,
records=None,
gz=False,
append_newline=True,
)
ppdb = PandasPdb()
ppdb.read_pdb(self.host_mm_pdb)
for i in mm_region_I_atoms:
ppdb.df["ATOM"] = ppdb.df["ATOM"][
ppdb.df["ATOM"]["atom_number"] != i
]
ppdb.to_pdb(
path=self.host_mm_region_II_pdb,
records=None,
gz=False,
append_newline=True,
)
[docs] def get_qm_mm_regions(self):
"""
Saves separate PDB files for the QM and MM regions.
QM regions comprise the QM region of the receptor
and the entire ligand where the MM region comprise
the non-selected QM regions of the receptor.
"""
with open(self.host_qm_pdb) as f1, open(self.qm_pdb, "w") as f2:
for line in f1:
if "ATOM" in line:
f2.write(line)
with open(self.guest_pdb) as f1, open(self.qm_pdb, "a") as f2:
for line in f1:
if "ATOM" in line:
f2.write(line)
f2.write("END")
with open(self.host_mm_pdb) as f1, open(self.mm_pdb, "w") as f2:
for line in f1:
if "ATOM" in line:
f2.write(line)
f2.write("END")
[docs]class PrepareGaussianGuest:
"""
A class used to prepare the QM engine input file (Gaussian)
for the ligand and run QM calculations with appropriate
keywords.
This class contain methods to write an input file (.com extension)
for the QM engine. It then runs a QM calculation with the given
basis set and functional. Checkpoint file is then converted to
a formatted checkpoint file. Output files (.log, .chk, and .fhck)
will then be used to extract ligand's force field parameters.
...
Attributes
----------
charge : int, optional
Charge of the ligand.
multiplicity: int, optional
Spin Multiplicity (2S+1) of the ligand where S represents
the total spin of the ligand.
guest_pdb: str, optional
Ligand PDB file with atom numbers beginning from 1.
n_processors : int, optional
Number of processors to be used for Gaussian program to run and
set in %NProcShared command of Gaussian.
memory : int, optional
Memory (in GB) to be used set in %Mem command of Gaussian.
functional: str, optional
Exchange/Correlation or hybrid functional to use in the Gaussian
QM calculation.
basis_set: str, optional
Basis set to use for the Gaussian QM calculation.
optimisation: str, optional
set to "OPT" to perform a geometry optimization on the ligand
specified in the system; else set to an empty string.
frequency: str, optional
set to "FREQ" for Gaussian to perform a frequency calculation;
else set to an empty string.
add_keywords_I: str, optional
Specifies the integration grid.
add_keywords_II: str, optional
Specifies the QM engine to select one of the methods for
analyzing the electron density of the system. Methods used
are based on fitting the molecular electrostatic potential.
Methods used are : POP=CHELPG (Charges from Electrostatic
Potentials using a Grid based method) and POP=MK
(Merz-Singh-Kollman scheme)
add_keywords_III: str, optional
Used to include the IOp keyword (to set the internal options to
specific values) in the Gaussian command.
gauss_out_file: str, optional
This file contains the output script obtained after running
the Gaussian QM calculation.
fchk_out_file: str, optional
Formatted checkpoint file obtained from the checkpoint file
using formchk command.
"""
[docs] def __init__(
self,
charge=0,
multiplicity=1,
guest_pdb="guest_init_II.pdb",
n_processors=12,
memory=50,
functional="B3LYP",
basis_set="6-31G",
optimisation="OPT",
frequency="FREQ",
add_keywords_I="INTEGRAL=(GRID=ULTRAFINE)",
add_keywords_II="POP(MK,READRADII)",
add_keywords_III="IOP(6/33=2,6/42=6)",
gauss_out_file="guest.out",
fchk_out_file="guest_fchk.out",
):
self.charge = charge
self.multiplicity = multiplicity
self.guest_pdb = guest_pdb
self.n_processors = n_processors
self.memory = memory
self.functional = functional
self.basis_set = basis_set
self.optimisation = optimisation
self.frequency = frequency
self.gauss_out_file = gauss_out_file
self.fchk_out_file = fchk_out_file
self.add_keywords_I = add_keywords_I
self.add_keywords_II = add_keywords_II
self.add_keywords_III = add_keywords_III
[docs] def run_gaussian(self):
"""
Runs the Gaussian QM calculation for the ligand locally.
"""
execute_command = (
"g16"
+ " < "
+ self.guest_pdb[:-4]
+ ".com"
+ " > "
+ self.guest_pdb[:-4]
+ ".log"
)
with open(self.gauss_out_file, "w+") as f:
sp.run(
execute_command, shell=True, stdout=f, stderr=sp.STDOUT,
)
[docs] def get_fchk(self):
"""
Converts the Gaussian checkpoint file (.chk) to a formatted checkpoint
file (.fchk).
"""
execute_command = (
"formchk"
+ " "
+ self.guest_pdb[:-4]
+ ".chk"
+ " "
+ self.guest_pdb[:-4]
+ ".fchk"
)
with open(self.fchk_out_file, "w+") as f:
sp.run(
execute_command, shell=True, stdout=f, stderr=sp.STDOUT,
)
[docs]class PrepareGaussianHostGuest:
"""
A class used to prepare the QM engine input file (Gaussian) for
the receptor - ligand complex and run the QM calculations with
the appropriate keywords.
This class contain methods to write an input file (.com extension)
for the QM engine for the receptor - ligand complex. It then runs
a QM calculation with the given basis set and functional. Checkpoint
file is then converted to a formatted checkpoint file. Output files
(.log, .chk, and .fhck) will then be used to extract charges for the
ligand and the receptor.
...
Attributes
----------
charge : int, optional
Total charge of the receptor - ligand complex.
multiplicity : int, optional
Spin Multiplicity (2S+1) of the ligand where S represents
the total spin of the ligand.
guest_pdb : str, optional
Ligand PDB file with atom numbers beginning from 1.
host_qm_pdb : str, optional
PDB file for the receptor's QM region.
n_processors : int, optional
Number of processors to be used for Gaussian program to run and
set in %NProcShared command of Gaussian.
memory : int, optional
Memory (in GB) to be used set in %Mem command of Gaussian.
functional: str, optional
Exchange/Correlation or hybrid functional to use in the Gaussian
QM calculation.
basis_set: str, optional
Basis set to use for the Gaussian QM calculation.
optimisation: str, optional
set to "OPT" to perform a geometry optimization on the ligand
specified in the system; else set to an empty string.
frequency: str, optional
set to "FREQ" for Gaussian to perform a frequency calculation;
else set to an empty string.
add_keywords_I: str, optional
Specifies the integration grid.
add_keywords_II: str, optional
Specifies the QM engine to select one of the methods for
analyzing the electron density of the system. Methods used
are based on fitting the molecular electrostatic potential.
Methods used are : POP=CHELPG (Charges from Electrostatic
Potentials using a Grid based method) and POP=MK
(Merz-Singh-Kollman scheme)
add_keywords_III: str, optional
Used to include the IOp keyword (to set the internal options to
specific values) in the Gaussian command.
gauss_system_out_file : str, optional
This file contains the output script obtained after running
the Gaussian QM calculation.
fchk_system_out_file : str, optional
Formatted checkpoint file obtained from the checkpoint file
using formchk command.
host_guest_input : str, optional
Gaussian input file (.com extension) for the receptor - ligand
QM region.
qm_guest_charge_parameter_file : str, optional
File containing the charges of ligand atoms and their corresponding
atoms. Charge obtained are the polarised charged due to the
surrounding receptor's region.
qm_host_charge_parameter_file : str, optional
File containing the charges of the QM region of the receptor.
qm_guest_atom_charge_parameter_file : str, optional
File containing the charges of ligand atoms. Charge obtained
are the polarised charged due to the surrounding receptor's region.
"""
[docs] def __init__(
self,
charge=0,
multiplicity=1,
guest_pdb="guest_init_II.pdb",
host_qm_pdb="host_qm.pdb",
n_processors=12,
memory=50,
functional="B3LYP",
basis_set="6-31G",
optimisation="",
frequency="",
add_keywords_I="INTEGRAL=(GRID=ULTRAFINE)",
add_keywords_II="POP(MK,READRADII)",
add_keywords_III="IOP(6/33=2,6/42=6) SCRF=PCM",
gauss_system_out_file="system_qm.out",
fchk_system_out_file="system_qm_fchk.out",
host_guest_input="host_guest.com",
qm_guest_charge_parameter_file="guest_qm_surround_charges.txt",
qm_host_charge_parameter_file="host_qm_surround_charges.txt",
qm_guest_atom_charge_parameter_file="guest_qm_atom_surround_charges.txt",
):
self.charge = charge
self.multiplicity = multiplicity
self.guest_pdb = guest_pdb
self.host_qm_pdb = host_qm_pdb
self.n_processors = n_processors
self.memory = memory
self.functional = functional
self.basis_set = basis_set
self.optimisation = optimisation
self.frequency = frequency
self.add_keywords_I = add_keywords_I
self.add_keywords_II = add_keywords_II
self.add_keywords_III = add_keywords_III
self.gauss_system_out_file = gauss_system_out_file
self.fchk_system_out_file = fchk_system_out_file
self.host_guest_input = host_guest_input
self.qm_guest_charge_parameter_file = qm_guest_charge_parameter_file
self.qm_host_charge_parameter_file = qm_host_charge_parameter_file
self.qm_guest_atom_charge_parameter_file = (
qm_guest_atom_charge_parameter_file
)
[docs] def run_gaussian(self):
"""
Runs the Gaussian QM calculation for the ligand - receptor region
locally.
"""
execute_command = (
"g16"
+ " < "
+ self.host_guest_input
+ " > "
+ self.host_guest_input[:-4]
+ ".log"
)
with open(self.gauss_system_out_file, "w+") as f:
sp.run(
execute_command, shell=True, stdout=f, stderr=sp.STDOUT,
)
[docs] def get_fchk(self):
"""
Converts the Gaussian checkpoint file (.chk) to a formatted checkpoint
file (.fchk).
"""
execute_command = (
"formchk"
+ " "
+ self.host_guest_input[:-4]
+ ".chk"
+ " "
+ self.host_guest_input[:-4]
+ ".fchk"
)
with open(self.fchk_system_out_file, "w+") as f:
sp.run(
execute_command, shell=True, stdout=f, stderr=sp.STDOUT,
)
[docs] def get_qm_host_guest_charges(self):
"""
Extract charge information for the receptor - ligand QM region.
"""
log_file = self.host_guest_input[:-4] + ".log"
with open(log_file, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if "Fitting point charges to electrostatic potential" in lines[i]:
to_begin = int(i)
if " Sum of ESP charges =" in lines[i]:
to_end = int(i)
# Why + 4?
charges = lines[to_begin + 4 : to_end]
charge_list = []
for i in range(len(charges)):
charge_list.append(charges[i].strip().split())
charge_list_value = []
atom_list = []
for i in range(len(charge_list)):
charge_list_value.append(charge_list[i][2])
atom_list.append(charge_list[i][1])
ppdb = PandasPdb()
ppdb.read_pdb(self.guest_pdb)
df_guest = ppdb.df["ATOM"]
number_guest_atoms = df_guest.shape[0]
data_tuples = list(zip(atom_list, charge_list_value))
df_charge = pd.DataFrame(data_tuples, columns=["Atom", "Charge"])
number_host_atoms = df_charge.shape[0] - number_guest_atoms
df_charge_guest = df_charge.head(number_guest_atoms)
df_charge_host = df_charge.tail(number_host_atoms)
df_charge_only_guest = df_charge_guest["Charge"]
df_charge_guest.to_csv(
self.qm_guest_charge_parameter_file,
index=False,
header=False,
sep=" ",
)
df_charge_host.to_csv(
self.qm_host_charge_parameter_file,
index=False,
header=False,
sep=" ",
)
df_charge_only_guest.to_csv(
self.qm_guest_atom_charge_parameter_file,
index=False,
header=False,
sep=" ",
)
[docs]class ParameterizeGuest:
"""
A class used to obtain force field parameters for the ligand (bond,
angle and charge parameters) from QM calculations.
This class contain methods to process the output files of the
Gaussian QM output files (.chk, .fchk and .log files). Methods
in the class extract the unprocessed hessian matrix from the
Gaussian QM calculations, processes it and uses the Modified
Seminario Method to ontain the bond and angle parameters. The
class also extracts the QM charges from the log file.
...
Attributes
----------
xyz_file: str, optional
XYZ file for ligand coordinates obtained from its corresponding
formatted checkpoint file.
coordinate_file: str, optional
Text file containing the ligand coordinates (extracted
from the formatted checkpoint file).
unprocessed_hessian_file: str, optional
Unprocessed hessian matrix of the ligand obtained from the
formatted checkpoint file.
bond_list_file: str, optional
Text file containing the bond information of the ligand extracted
from the log file.
angle_list_file: str, optional
Text file containing the angle information of the ligand extracted
from the log file.
hessian_file: str, optional
Processed hessian matrix of the ligand.
atom_names_file: str, optional
Text file containing the list of atom names from the fchk file.
bond_parameter_file: str, optional
Text file containing the bond parameters for the ligand obtained
using the Modified Seminario method.
angle_parameter_file: str, optional
Text file containing the angle parameters of the ligand obtained
using the Modified Seminario method..
charge_parameter_file: str, optional
Text file containing the QM charges of the ligand.
guest_pdb: str, optional
Ligand PDB file with atom numbers beginning from 1.
proper_dihedral_file: str, optional
A text file containing proper dihedral angles of the ligand.
functional: str, optional
Exchange/Correlation or hybrid functional to use in the Gaussian
QM calculation.
basis_set: str, optional
Basis set to use for the Gaussian QM calculation.
"""
[docs] def __init__(
self,
xyz_file="guest_coords.xyz",
coordinate_file="guest_coordinates.txt",
unprocessed_hessian_file="guest_unprocessed_hessian.txt",
bond_list_file="guest_bond_list.txt",
angle_list_file="guest_angle_list.txt",
hessian_file="guest_hessian.txt",
atom_names_file="guest_atom_names.txt",
bond_parameter_file="guest_bonds.txt",
angle_parameter_file="guest_angles.txt",
charge_parameter_file="guest_qm_surround_charges.txt",
guest_pdb="guest_init_II.pdb",
proper_dihedral_file="proper_dihedrals.txt",
functional="B3LYP",
basis_set="6-31G",
):
self.xyz_file = xyz_file
self.coordinate_file = coordinate_file
self.unprocessed_hessian_file = unprocessed_hessian_file
self.bond_list_file = bond_list_file
self.angle_list_file = angle_list_file
self.hessian_file = hessian_file
self.atom_names_file = atom_names_file
self.bond_parameter_file = bond_parameter_file
self.angle_parameter_file = angle_parameter_file
self.charge_parameter_file = charge_parameter_file
self.guest_pdb = guest_pdb
self.proper_dihedral_file = proper_dihedral_file
self.functional = functional
self.basis_set = basis_set
[docs] def get_xyz(self):
"""
Saves XYZ file from the formatted checkpoint file.
"""
fchk_file = self.guest_pdb[:-4] + ".fchk"
with open(fchk_file, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if "Current cartesian coordinates" in lines[i]:
no_coordinates = re.findall(r"\d+|\d+.\d+", lines[i])
no_coordinates = int(no_coordinates[0])
to_begin = int(i)
cartesian_coords = lines[
to_begin + 1 : to_begin + 1 + int(math.ceil(no_coordinates / 5))
]
cartesian_list = []
for i in range(len(cartesian_coords)):
cartesian_list.append(cartesian_coords[i].strip().split())
coordinates_list = [
item for sublist in cartesian_list for item in sublist
]
# Converted from Atomic units (Bohrs) to Angstroms
list_coords = [float(x) * BOHRS_PER_ANGSTROM for x in coordinates_list]
for i in range(len(lines)):
if "Atomic numbers" in lines[i]:
to_begin = int(i)
if "Nuclear charges" in lines[i]:
to_end = int(i)
atomic_number_strings = lines[to_begin + 1 : to_end]
atom_numbers_nested = []
for i in range(len(atomic_number_strings)):
atom_numbers_nested.append(atomic_number_strings[i].strip().split())
numbers = [item for sublist in atom_numbers_nested for item in sublist]
N = int(no_coordinates / 3)
# Opens the new xyz file
with open(self.xyz_file, "w") as file:
file.write(str(N) + "\n \n")
coords = np.zeros((N, 3))
n = 0
names = []
# Gives name for atomic number
for x in range(0, len(numbers)):
names.append(element_list[int(numbers[x]) - 1][1])
# Print coordinates to new input_coords.xyz file
for i in range(0, N):
for j in range(0, 3):
coords[i][j] = list_coords[n]
n = n + 1
file.write(
names[i]
+ str(round(coords[i][0], 3))
+ " "
+ str(round(coords[i][1], 3))
+ " "
+ str(round(coords[i][2], 3))
+ "\n"
)
np.savetxt(self.coordinate_file, coords, fmt="%s")
[docs] def get_unprocessed_hessian(self):
"""
Saves a text file of the unprocessed hessian matrix from the
formatted checkpoint file.
"""
fchk_file = self.guest_pdb[:-4] + ".fchk"
with open(fchk_file, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if "Cartesian Force Constants" in lines[i]:
no_hessian = re.findall(r"\d+|\d+.\d+", lines[i])
no_hessian = int(no_hessian[0])
to_begin = int(i)
hessian = lines[
to_begin + 1 : to_begin + 1 + int(math.ceil(no_hessian / 5))
]
hessian_list = []
for i in range(len(hessian)):
hessian_list.append(hessian[i].strip().split())
unprocessed_Hessian = [
item for sublist in hessian_list for item in sublist
]
np.savetxt(
self.unprocessed_hessian_file, unprocessed_Hessian, fmt="%s",
)
[docs] def get_bond_angles(self):
"""
Saves a text file containing bonds and angles from the gaussian
log file.
"""
log_file = self.guest_pdb[:-4] + ".log"
with open(log_file, "r") as fid:
tline = fid.readline()
bond_list = []
angle_list = []
tmp = "R" # States if bond or angle
# Finds the bond and angles from the .log file
while tline:
tline = fid.readline()
# Line starts at point when bond and angle list occurs
if (
len(tline) > 80
and tline[0:81].strip()
== "! Name Definition Value Derivative Info. !"
):
tline = fid.readline()
tline = fid.readline()
# Stops when all bond and angles recorded
while (tmp[0] == "R") or (tmp[0] == "A"):
line = tline.split()
tmp = line[1]
# Bond or angles listed as string
list_terms = line[2][2:-1]
# Bond List
if tmp[0] == "R":
x = list_terms.split(",")
# Subtraction due to python array indexing at 0
x = [(int(i) - 1) for i in x]
bond_list.append(x)
# Angle List
if tmp[0] == "A":
x = list_terms.split(",")
# Subtraction due to python array indexing at 0
x = [(int(i) - 1) for i in x]
angle_list.append(x)
tline = fid.readline()
# Leave loop
tline = -1
np.savetxt(self.bond_list_file, bond_list, fmt="%s")
np.savetxt(self.angle_list_file, angle_list, fmt="%s")
[docs] def get_hessian(self):
"""
Extracts hessian matrix from the unprocessed hessian matrix
and saves into a new file.
"""
unprocessed_Hessian = np.loadtxt(self.unprocessed_hessian_file)
fchk_file = self.guest_pdb[:-4] + ".fchk"
with open(fchk_file, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if "Current cartesian coordinates" in lines[i]:
no_coordinates = re.findall(r"\d+|\d+.\d+", lines[i])
no_coordinates = int(no_coordinates[0])
N = int(no_coordinates / 3)
length_hessian = 3 * N
hessian = np.zeros((length_hessian, length_hessian))
m = 0
# Write the hessian in a 2D array format
for i in range(0, length_hessian):
for j in range(0, (i + 1)):
hessian[i][j] = unprocessed_Hessian[m]
hessian[j][i] = unprocessed_Hessian[m]
m = m + 1
hessian = (hessian * HARTREE_PER_KCAL_MOL) / (
BOHRS_PER_ANGSTROM ** 2
) # Change from Hartree/bohr to kcal/mol/ang
np.savetxt(self.hessian_file, hessian, fmt="%s")
[docs] def get_atom_names(self):
"""
Saves a list of atom names from the formatted checkpoint file.
"""
fchk_file = self.guest_pdb[:-4] + ".fchk"
with open(fchk_file, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if "Atomic numbers" in lines[i]:
to_begin = int(i)
if "Nuclear charges" in lines[i]:
to_end = int(i)
atomic_numbers = lines[to_begin + 1 : to_end]
atom_numbers = []
for i in range(len(atomic_numbers)):
atom_numbers.append(atomic_numbers[i].strip().split())
numbers = [item for sublist in atom_numbers for item in sublist]
names = []
# Gives name for atomic number
for x in range(0, len(numbers)):
names.append(element_list[int(numbers[x]) - 1][1])
atom_names = []
for i in range(0, len(names)):
atom_names.append(names[i].strip() + str(i + 1))
np.savetxt(self.atom_names_file, atom_names, fmt="%s")
[docs] def get_bond_angle_params(self):
"""
Saves the bond and angle parameter files obtained from
the formatted checkpoint file.
"""
fchk_file = self.guest_pdb[:-4] + ".fchk"
with open(fchk_file, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if "Current cartesian coordinates" in lines[i]:
no_coordinates = re.findall(r"\d+|\d+.\d+", lines[i])
no_coordinates = int(no_coordinates[0])
N = int(no_coordinates / 3)
coords = np.loadtxt(self.coordinate_file)
hessian = np.loadtxt(self.hessian_file)
bond_list = np.loadtxt(self.bond_list_file, dtype=int)
atom_names = np.loadtxt(self.atom_names_file, dtype=str)
# Find bond lengths
bond_lengths = np.zeros((N, N))
for i in range(0, N):
for j in range(0, N):
diff_i_j = np.array(coords[i, :]) - np.array(coords[j, :])
bond_lengths[i][j] = np.linalg.norm(diff_i_j)
eigenvectors = np.empty((3, 3, N, N), dtype=complex)
eigenvalues = np.empty((N, N, 3), dtype=complex)
partial_hessian = np.zeros((3, 3))
for i in range(0, N):
for j in range(0, N):
partial_hessian = hessian[
(i * 3) : ((i + 1) * 3), (j * 3) : ((j + 1) * 3)
]
[a, b] = np.linalg.eig(partial_hessian)
eigenvalues[i, j, :] = a
eigenvectors[:, :, i, j] = b
# Modified Seminario method to find the bond parameters and
# print them to file
file_bond = open(self.bond_parameter_file, "w")
k_b = np.zeros(len(bond_list))
bond_length_list = np.zeros(len(bond_list))
unique_values_bonds = [] # Used to find average values
for i in range(0, len(bond_list)):
AB = force_constant_bond(
bond_list[i][0],
bond_list[i][1],
eigenvalues,
eigenvectors,
coords,
)
BA = force_constant_bond(
bond_list[i][1],
bond_list[i][0],
eigenvalues,
eigenvectors,
coords,
)
# Order of bonds sometimes causes slight differences,
# find the mean
k_b[i] = np.real((AB + BA) / 2)
# Vibrational_scaling takes into account DFT deficities /
# anharmocity
vibrational_scaling = get_vibrational_scaling(
functional=self.functional, basis_set=self.basis_set
)
vibrational_scaling_squared = vibrational_scaling ** 2
k_b[i] = k_b[i] * vibrational_scaling_squared
bond_length_list[i] = bond_lengths[bond_list[i][0]][
bond_list[i][1]
]
file_bond.write(
atom_names[bond_list[i][0]]
+ "-"
+ atom_names[bond_list[i][1]]
+ " "
)
file_bond.write(
str("%#.5g" % k_b[i])
+ " "
+ str("%#.4g" % bond_length_list[i])
+ " "
+ str(bond_list[i][0] + 1)
+ " "
+ str(bond_list[i][1] + 1)
)
file_bond.write("\n")
unique_values_bonds.append(
[
atom_names[bond_list[i][0]],
atom_names[bond_list[i][1]],
k_b[i],
bond_length_list[i],
1,
]
)
file_bond.close()
angle_list = np.loadtxt(self.angle_list_file, dtype=int)
# Modified Seminario method to find the angle parameters
# and print them to file
file_angle = open(self.angle_parameter_file, "w")
k_theta = np.zeros(len(angle_list))
theta_0 = np.zeros(len(angle_list))
unique_values_angles = [] # Used to find average values
# Modified Seminario part goes here ...
# Connectivity information for Modified Seminario Method
central_atoms_angles = []
# A structure is created with the index giving the central
# atom of the angle,
# an array then lists the angles with that central atom.
# i.e. central_atoms_angles{3} contains an array of angles
# with central atom 3
for i in range(0, len(coords)):
central_atoms_angles.append([])
for j in range(0, len(angle_list)):
if i == angle_list[j][1]:
# For angle ABC, atoms A C are written to array
AC_array = [angle_list[j][0], angle_list[j][2], j]
central_atoms_angles[i].append(AC_array)
# For angle ABC, atoms C A are written to array
CA_array = [angle_list[j][2], angle_list[j][0], j]
central_atoms_angles[i].append(CA_array)
# Sort rows by atom number
for i in range(0, len(coords)):
central_atoms_angles[i] = sorted(
central_atoms_angles[i], key=itemgetter(0)
)
# Find normals u_PA for each angle
unit_PA_all_angles = []
for i in range(0, len(central_atoms_angles)):
unit_PA_all_angles.append([])
for j in range(0, len(central_atoms_angles[i])):
# For the angle at central_atoms_angles[i][j,:] the
# corresponding u_PA value
# is found for the plane ABC and bond AB, where ABC
# corresponds to the order
# of the arguements. This is why the reverse order
# was also added
unit_PA_all_angles[i].append(
u_PA_from_angles(
central_atoms_angles[i][j][0],
i,
central_atoms_angles[i][j][1],
coords,
)
)
# Finds the contributing factors from the other angle terms
# scaling_factor_all_angles
# = cell(max(max(angle_list))); %This array will contain
# scaling factor and angle list position
scaling_factor_all_angles = []
for i in range(0, len(central_atoms_angles)):
scaling_factor_all_angles.append([])
for j in range(0, len(central_atoms_angles[i])):
n = 1
m = 1
angles_around = 0
additional_contributions = 0
scaling_factor_all_angles[i].append([0, 0])
# Position in angle list
scaling_factor_all_angles[i][j][1] = central_atoms_angles[i][
j
][2]
# Goes through the list of angles with the same central atom
# and computes the
# term need for the modified Seminario method
# Forwards directions, finds the same bonds with the central atom i
while (
((j + n) < len(central_atoms_angles[i]))
and central_atoms_angles[i][j][0]
== central_atoms_angles[i][j + n][0]
):
additional_contributions = (
additional_contributions
+ (
abs(
np.dot(
unit_PA_all_angles[i][j][:],
unit_PA_all_angles[i][j + n][:],
)
)
)
** 2
)
n = n + 1
angles_around = angles_around + 1
# Backwards direction, finds the same bonds with the central atom i
while ((j - m) >= 0) and central_atoms_angles[i][j][
0
] == central_atoms_angles[i][j - m][0]:
additional_contributions = (
additional_contributions
+ (
abs(
np.dot(
unit_PA_all_angles[i][j][:],
unit_PA_all_angles[i][j - m][:],
)
)
)
** 2
)
m = m + 1
angles_around = angles_around + 1
if n != 1 or m != 1:
# Finds the mean value of the additional contribution to
# change to normal
# Seminario method comment out + part
scaling_factor_all_angles[i][j][0] = 1 + (
additional_contributions / (m + n - 2)
)
else:
scaling_factor_all_angles[i][j][0] = 1
scaling_factors_angles_list = []
for i in range(0, len(angle_list)):
scaling_factors_angles_list.append([])
# Orders the scaling factors according to the angle list
for i in range(0, len(central_atoms_angles)):
for j in range(0, len(central_atoms_angles[i])):
scaling_factors_angles_list[
scaling_factor_all_angles[i][j][1]
].append(scaling_factor_all_angles[i][j][0])
# Finds the angle force constants with the scaling factors
# included for each angle
for i in range(0, len(angle_list)):
# Ensures that there is no difference when the
# ordering is changed
[AB_k_theta, AB_theta_0] = force_angle_constant(
angle_list[i][0],
angle_list[i][1],
angle_list[i][2],
bond_lengths,
eigenvalues,
eigenvectors,
coords,
scaling_factors_angles_list[i][0],
scaling_factors_angles_list[i][1],
)
[BA_k_theta, BA_theta_0] = force_angle_constant(
angle_list[i][2],
angle_list[i][1],
angle_list[i][0],
bond_lengths,
eigenvalues,
eigenvectors,
coords,
scaling_factors_angles_list[i][1],
scaling_factors_angles_list[i][0],
)
k_theta[i] = (AB_k_theta + BA_k_theta) / 2
theta_0[i] = (AB_theta_0 + BA_theta_0) / 2
# Vibrational_scaling takes into account DFT
# deficities/ anharmonicity
k_theta[i] = k_theta[i] * vibrational_scaling_squared
file_angle.write(
atom_names[angle_list[i][0]]
+ "-"
+ atom_names[angle_list[i][1]]
+ "-"
+ atom_names[angle_list[i][2]]
+ " "
)
file_angle.write(
str("%#.4g" % k_theta[i])
+ " "
+ str("%#.4g" % theta_0[i])
+ " "
+ str(angle_list[i][0] + 1)
+ " "
+ str(angle_list[i][1] + 1)
+ " "
+ str(angle_list[i][2] + 1)
)
file_angle.write("\n")
unique_values_angles.append(
[
atom_names[angle_list[i][0]],
atom_names[angle_list[i][1]],
atom_names[angle_list[i][2]],
k_theta[i],
theta_0[i],
1,
]
)
file_angle.close()
[docs] def get_charges(self):
"""
Saves the atomic charges in a text file obtained from
the Gaussian log file.
"""
log_file = self.guest_pdb[:-4] + ".log"
with open(log_file, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if "Fitting point charges to electrostatic potential" in lines[i]:
to_begin = int(i)
if " Sum of ESP charges =" in lines[i]:
to_end = int(i)
charges = lines[to_begin + 4 : to_end]
charge_list = []
for i in range(len(charges)):
charge_list.append(charges[i].strip().split())
charge_list_value = []
atom_list = []
for i in range(len(charge_list)):
charge_list_value.append(charge_list[i][2])
atom_list.append(charge_list[i][1])
data_tuples = list(zip(atom_list, charge_list_value))
df_charge = pd.DataFrame(data_tuples, columns=["Atom", "Charge"])
df_charge.to_csv(
self.charge_parameter_file, index=False, header=False, sep=" ",
)
[docs] def get_proper_dihedrals(self):
"""
Saves proper dihedral angles of the ligand in a text file.
"""
ppdb = PandasPdb()
ppdb.read_pdb(self.guest_pdb)
no_atoms = len(ppdb.df["ATOM"])
atom_index_list = []
for i in range(no_atoms):
atom_index_list.append(i + 1)
possible_dihedrals = []
for dihed in itertools.permutations(atom_index_list, 4):
possible_dihedrals.append(dihed)
df_bonds = pd.read_csv(
self.bond_parameter_file, header=None, delimiter=r"\s+"
)
df_bonds.columns = [
"bond",
"k_bond",
"bond_length",
"bond_1",
"bond_2",
]
bond1 = df_bonds["bond_1"].values.tolist()
bond2 = df_bonds["bond_2"].values.tolist()
bond_list_list = []
for i in range(len(bond1)):
args = (bond1[i], bond2[i])
bond_list_list.append(list(args))
reverse_bond_list_list = []
for bonds in bond_list_list:
reverse_bond_list_list.append(reverse_list(bonds))
bond_lists = bond_list_list + reverse_bond_list_list
proper_dihed_repeated = []
for i in range(len(possible_dihedrals)):
dihed_frag = (
[possible_dihedrals[i][0], possible_dihedrals[i][1]],
[possible_dihedrals[i][1], possible_dihedrals[i][2]],
[possible_dihedrals[i][2], possible_dihedrals[i][3]],
)
a = [
dihed_frag[0] in bond_lists,
dihed_frag[1] in bond_lists,
dihed_frag[2] in bond_lists,
]
if a == [True, True, True]:
proper_dihed_repeated.append(possible_dihedrals[i])
len_repeated_dihed_list = len(proper_dihed_repeated)
proper_dihedrals = proper_dihed_repeated
for x in proper_dihedrals:
z = x[::-1]
if z in proper_dihedrals:
proper_dihedrals.remove(z)
len_non_repeated_dihed_list = len(proper_dihedrals)
# print(len_repeated_dihed_list == len_non_repeated_dihed_list * 2)
np.savetxt(self.proper_dihedral_file, proper_dihedrals, fmt="%s")
# return(proper_dihedrals)
[docs]class PrepareGaussianHost:
"""
A class used to prepare the QM engine input file (Gaussian)
for the receptor and run QM calculations with appropriate keywords.
This class contain methods to write an input file (.com extension)
for the QM engine. It then runs a QM calculation with the given
basis set and functional. Checkpoint file is then converted to
a formatted checkpoint file. Output files (.log, .chk, and .fhck)
will then be used to extract receptors's force field parameters.
...
Attributes
----------
charge : int, optional
Charge of the receptor.
multiplicity: int, optional
Spin Multiplicity (2S+1) of the receptor where S represents
the total spin of the receptor.
host_qm_pdb: str, optional
PDB file of the receptor's QM region with atom numbers
beginning from 1.
n_processors : int, optional
Number of processors to be used for Gaussian program to run and
set in %NProcShared command of Gaussian.
memory : int, optional
Memory (in GB) to be used set in %Mem command of Gaussian.
functional: str, optional
Exchange/Correlation or hybrid functional to use in the Gaussian
QM calculation.
basis_set: str, optional
Basis set to use for the Gaussian QM calculation.
optimisation: str, optional
set to "OPT" to perform a geometry optimization on the receptor
specified in the system; else set to an empty string.
frequency: str, optional
set to "FREQ" for Gaussian to perform a frequency calculation;
else set to an empty string.
add_keywords_I: str, optional
Specifies the integration grid.
add_keywords_II: str, optional
Specifies the QM engine to select one of the methods for
analyzing the electron density of the system. Methods used
are based on fitting the molecular electrostatic potential.
Methods used are : POP=CHELPG (Charges from Electrostatic
Potentials using a Grid based method) and POP=MK
(Merz-Singh-Kollman scheme)
add_keywords_III: str, optional
Used to include the IOp keyword (to set the internal options to
specific values) in the Gaussian command.
gauss_out_file: str, optional
This file contains the output script obtained after running
the Gaussian QM calculation.
fchk_out_file: str, optional
Formatted checkpoint file obtained from the checkpoint file
using formchk command.
"""
def __init__(
self,
charge=0,
multiplicity=1,
host_qm_pdb="host_qm.pdb",
n_processors=12,
memory=50,
functional="B3LYP",
basis_set="6-31G",
optimisation="OPT",
frequency="FREQ",
add_keywords_I="INTEGRAL=(GRID=ULTRAFINE) SCF=(maxcycles=4000) SYMMETRY=NONE",
add_keywords_II="POP(MK,READRADII)",
add_keywords_III="IOP(6/33=2,6/42=6)",
gauss_out_file="host_qm.out",
fchk_out_file="host_qm_fchk.out",
):
self.charge = charge
self.multiplicity = multiplicity
self.host_qm_pdb = host_qm_pdb
self.n_processors = n_processors
self.memory = memory
self.functional = functional
self.basis_set = basis_set
self.optimisation = optimisation
self.frequency = frequency
self.gauss_out_file = gauss_out_file
self.fchk_out_file = fchk_out_file
self.add_keywords_I = add_keywords_I
self.add_keywords_II = add_keywords_II
self.add_keywords_III = add_keywords_III
[docs] def run_gaussian(self):
"""
Runs the Gaussian QM calculation for the receptor locally.
"""
execute_command = (
"g16"
+ " < "
+ self.host_qm_pdb[:-4]
+ ".com"
+ " > "
+ self.host_qm_pdb[:-4]
+ ".log"
)
with open(self.gauss_out_file, "w+") as f:
sp.run(
execute_command, shell=True, stdout=f, stderr=sp.STDOUT,
)
[docs] def get_fchk(self):
"""
Converts the Gaussian checkpoint file (.chk) to a formatted checkpoint
file (.fchk).
"""
execute_command = (
"formchk"
+ " "
+ self.host_qm_pdb[:-4]
+ ".chk"
+ " "
+ self.host_qm_pdb[:-4]
+ ".fchk"
)
with open(self.fchk_out_file, "w+") as f:
sp.run(
execute_command, shell=True, stdout=f, stderr=sp.STDOUT,
)
[docs]class ParameterizeHost:
"""
A class used to obtain force field parameters for the QM region
of the receptor (bond, angle and charge parameters) from QM
calculations.
This class contain methods to process the output files of the
Gaussian QM output files (.chk, .fchk and .log files). Methods
in the class extract the unprocessed hessian matrix from the
Gaussian QM calculations, processes it and uses the Modified
Seminario Method to ontain the bond and angle parameters. The
class also extracts the QM charges from the log file.
...
Attributes
----------
xyz_file: str, optional
XYZ file for ligand coordinates obtained from its corresponding
formatted checkpoint file.
coordinate_file: str, optional
Text file containing the receptor coordinates (extracted
from the formatted checkpoint file).
unprocessed_hessian_file: str, optional
Unprocessed hessian matrix of the receptor obtained from the
formatted checkpoint file.
bond_list_file: str, optional
Text file containing the bond information of the receptor
extracted from the log file.
angle_list_file: str, optional
Text file containing the angle information of the receptor
extracted from the log file.
hessian_file: str, optional
Processed hessian matrix of the receptor.
atom_names_file: str, optional
Text file containing the list of atom names from the fchk file.
bond_parameter_file: str, optional
Text file containing the bond parameters for the receptor
obtained using the Modified Seminario method.
angle_parameter_file: str, optional
Text file containing the angle parameters of the receptor.
charge_parameter_file: str, optional
Text file containing the QM charges of the receptor.
host_qm_pdb: str, optional
PDB file for the receptor's QM region.
functional: str, optional
Exchange/Correlation or hybrid functional to use in the Gaussian
QM calculation.
basis_set: str, optional
Basis set to use for the Gaussian QM calculation.
"""
[docs] def __init__(
self,
xyz_file="host_qm_coords.xyz",
coordinate_file="host_qm_coordinates.txt",
unprocessed_hessian_file="host_qm_unprocessed_hessian.txt",
bond_list_file="host_qm_bond_list.txt",
angle_list_file="host_qm_angle_list.txt",
hessian_file="host_qm_hessian.txt",
atom_names_file="host_qm_atom_names.txt",
bond_parameter_file="host_qm_bonds.txt",
angle_parameter_file="host_qm_angles.txt",
charge_parameter_file="host_qm_surround_charges.txt",
host_qm_pdb="host_qm.pdb",
functional="B3LYP",
basis_set="6-31G",
):
self.xyz_file = xyz_file
self.coordinate_file = coordinate_file
self.unprocessed_hessian_file = unprocessed_hessian_file
self.bond_list_file = bond_list_file
self.angle_list_file = angle_list_file
self.hessian_file = hessian_file
self.atom_names_file = atom_names_file
self.bond_parameter_file = bond_parameter_file
self.angle_parameter_file = angle_parameter_file
self.charge_parameter_file = charge_parameter_file
self.host_qm_pdb = host_qm_pdb
self.functional = functional
self.basis_set = basis_set
[docs] def get_xyz(self):
"""
Saves XYZ file from the formatted checkpoint file.
"""
fchk_file = self.host_qm_pdb[:-4] + ".fchk"
with open(fchk_file, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if "Current cartesian coordinates" in lines[i]:
no_coordinates = re.findall(r"\d+|\d+.\d+", lines[i])
no_coordinates = int(no_coordinates[0])
for i in range(len(lines)):
if "Current cartesian coordinates" in lines[i]:
to_begin = int(i)
cartesian_coords = lines[
to_begin + 1 : to_begin + 1 + int(math.ceil(no_coordinates / 5))
]
cartesian_list = []
for i in range(len(cartesian_coords)):
cartesian_list.append(cartesian_coords[i].strip().split())
coordinates_list = [
item for sublist in cartesian_list for item in sublist
]
list_coords = [float(x) * float(0.529) for x in coordinates_list]
for i in range(len(lines)):
if "Atomic numbers" in lines[i]:
to_begin = int(i)
if "Nuclear charges" in lines[i]:
to_end = int(i)
atomic_numbers = lines[to_begin + 1 : to_end]
atom_numbers = []
for i in range(len(atomic_numbers)):
atom_numbers.append(atomic_numbers[i].strip().split())
numbers = [item for sublist in atom_numbers for item in sublist]
N = int(no_coordinates / 3)
# Opens the new xyz file
file = open(self.xyz_file, "w")
file.write(str(N) + "\n \n")
coords = np.zeros((N, 3))
n = 0
names = []
# Gives name for atomic number
for x in range(0, len(numbers)):
names.append(element_list[int(numbers[x]) - 1][1])
# Print coordinates to new input_coords.xyz file
for i in range(0, N):
for j in range(0, 3):
coords[i][j] = list_coords[n]
n = n + 1
file.write(
names[i]
+ str(round(coords[i][0], 3))
+ " "
+ str(round(coords[i][1], 3))
+ " "
+ str(round(coords[i][2], 3))
+ "\n"
)
file.close()
np.savetxt(self.coordinate_file, coords, fmt="%s")
[docs] def get_unprocessed_hessian(self):
"""
Saves a text file of the unprocessed hessian matrix from the
formatted checkpoint file.
"""
fchk_file = self.host_qm_pdb[:-4] + ".fchk"
with open(fchk_file, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if "Cartesian Force Constants" in lines[i]:
no_hessian = re.findall(r"\d+|\d+.\d+", lines[i])
no_hessian = int(no_hessian[0])
for i in range(len(lines)):
if "Cartesian Force Constants" in lines[i]:
to_begin = int(i)
hessian = lines[
to_begin + 1 : to_begin + 1 + int(math.ceil(no_hessian / 5))
]
hessian_list = []
for i in range(len(hessian)):
hessian_list.append(hessian[i].strip().split())
unprocessed_Hessian = [
item for sublist in hessian_list for item in sublist
]
np.savetxt(
self.unprocessed_hessian_file, unprocessed_Hessian, fmt="%s",
)
[docs] def get_bond_angles(self):
"""
Saves a text file containing bonds and angles from the gaussian
log file.
"""
log_file = self.host_qm_pdb[:-4] + ".log"
fid = open(log_file, "r")
tline = fid.readline()
bond_list = []
angle_list = []
n = 1
n_bond = 1
n_angle = 1
tmp = "R" # States if bond or angle
B = []
# Finds the bond and angles from the .log file
while tline:
tline = fid.readline()
# Line starts at point when bond and angle list occurs
if (
len(tline) > 80
and tline[0:81].strip()
== "! Name Definition Value Derivative Info. !"
):
tline = fid.readline()
tline = fid.readline()
# Stops when all bond and angles recorded
while (tmp[0] == "R") or (tmp[0] == "A"):
line = tline.split()
tmp = line[1]
# Bond or angles listed as string
list_terms = line[2][2:-1]
# Bond List
if tmp[0] == "R":
x = list_terms.split(",")
# Subtraction due to python array indexing at 0
x = [(int(i) - 1) for i in x]
bond_list.append(x)
# Angle List
if tmp[0] == "A":
x = list_terms.split(",")
# Subtraction due to python array indexing at 0
x = [(int(i) - 1) for i in x]
angle_list.append(x)
tline = fid.readline()
# Leave loop
tline = -1
np.savetxt(self.bond_list_file, bond_list, fmt="%s")
np.savetxt(self.angle_list_file, angle_list, fmt="%s")
[docs] def get_hessian(self):
"""
Extracts hessian matrix from the unprocessed hessian matrix
and saves into a new file.
"""
unprocessed_Hessian = np.loadtxt(self.unprocessed_hessian_file)
fchk_file = self.host_qm_pdb[:-4] + ".fchk"
with open(fchk_file, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if "Current cartesian coordinates" in lines[i]:
no_coordinates = re.findall(r"\d+|\d+.\d+", lines[i])
no_coordinates = int(no_coordinates[0])
N = int(no_coordinates / 3)
length_hessian = 3 * N
hessian = np.zeros((length_hessian, length_hessian))
m = 0
# Write the hessian in a 2D array format
for i in range(0, (length_hessian)):
for j in range(0, (i + 1)):
hessian[i][j] = unprocessed_Hessian[m]
hessian[j][i] = unprocessed_Hessian[m]
m = m + 1
hessian = (hessian * (627.509391)) / (
0.529 ** 2
) # Change from Hartree/bohr to kcal/mol/ang
np.savetxt(self.hessian_file, hessian, fmt="%s")
[docs] def get_atom_names(self):
"""
Saves a list of atom names from the formatted checkpoint file.
"""
fchk_file = self.host_qm_pdb[:-4] + ".fchk"
with open(fchk_file, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if "Atomic numbers" in lines[i]:
to_begin = int(i)
if "Nuclear charges" in lines[i]:
to_end = int(i)
atomic_numbers = lines[to_begin + 1 : to_end]
atom_numbers = []
for i in range(len(atomic_numbers)):
atom_numbers.append(atomic_numbers[i].strip().split())
numbers = [item for sublist in atom_numbers for item in sublist]
names = []
# Gives name for atomic number
for x in range(0, len(numbers)):
names.append(element_list[int(numbers[x]) - 1][1])
atom_names = []
for i in range(0, len(names)):
atom_names.append(names[i].strip() + str(i + 1))
np.savetxt(self.atom_names_file, atom_names, fmt="%s")
[docs] def get_bond_angle_params(self):
"""
Saves the bond and angle parameter files obtained from
the formatted checkpoint file.
"""
fchk_file = self.host_qm_pdb[:-4] + ".fchk"
with open(fchk_file, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if "Current cartesian coordinates" in lines[i]:
no_coordinates = re.findall(r"\d+|\d+.\d+", lines[i])
no_coordinates = int(no_coordinates[0])
N = int(no_coordinates / 3)
coords = np.loadtxt(self.coordinate_file)
hessian = np.loadtxt(self.hessian_file)
bond_list = np.loadtxt(self.bond_list_file, dtype=int)
atom_names = np.loadtxt(self.atom_names_file, dtype=str)
# Find bond lengths
bond_lengths = np.zeros((N, N))
for i in range(0, N):
for j in range(0, N):
diff_i_j = np.array(coords[i, :]) - np.array(coords[j, :])
bond_lengths[i][j] = np.linalg.norm(diff_i_j)
eigenvectors = np.empty((3, 3, N, N), dtype=complex)
eigenvalues = np.empty((N, N, 3), dtype=complex)
partial_hessian = np.zeros((3, 3))
for i in range(0, N):
for j in range(0, N):
partial_hessian = hessian[
(i * 3) : ((i + 1) * 3), (j * 3) : ((j + 1) * 3)
]
[a, b] = np.linalg.eig(partial_hessian)
eigenvalues[i, j, :] = a
eigenvectors[:, :, i, j] = b
# Modified Seminario method to find the bond parameters
# and print them to file
file_bond = open(self.bond_parameter_file, "w")
k_b = np.zeros(len(bond_list))
bond_length_list = np.zeros(len(bond_list))
unique_values_bonds = [] # Used to find average values
for i in range(0, len(bond_list)):
AB = force_constant_bond(
bond_list[i][0],
bond_list[i][1],
eigenvalues,
eigenvectors,
coords,
)
BA = force_constant_bond(
bond_list[i][1],
bond_list[i][0],
eigenvalues,
eigenvectors,
coords,
)
# Order of bonds sometimes causes slight differences,
# find the mean
k_b[i] = np.real((AB + BA) / 2)
# Vibrational_scaling takes into account DFT deficities
# / anharmocity
vibrational_scaling = get_vibrational_scaling(
functional=self.functional, basis_set=self.basis_set
)
vibrational_scaling_squared = vibrational_scaling ** 2
k_b[i] = k_b[i] * vibrational_scaling_squared
bond_length_list[i] = bond_lengths[bond_list[i][0]][
bond_list[i][1]
]
file_bond.write(
atom_names[bond_list[i][0]]
+ "-"
+ atom_names[bond_list[i][1]]
+ " "
)
file_bond.write(
str("%#.5g" % k_b[i])
+ " "
+ str("%#.4g" % bond_length_list[i])
+ " "
+ str(bond_list[i][0] + 1)
+ " "
+ str(bond_list[i][1] + 1)
)
file_bond.write("\n")
unique_values_bonds.append(
[
atom_names[bond_list[i][0]],
atom_names[bond_list[i][1]],
k_b[i],
bond_length_list[i],
1,
]
)
file_bond.close()
angle_list = np.loadtxt(self.angle_list_file, dtype=int)
# Modified Seminario method to find the angle parameters
# and print them to file
file_angle = open(self.angle_parameter_file, "w")
k_theta = np.zeros(len(angle_list))
theta_0 = np.zeros(len(angle_list))
unique_values_angles = [] # Used to find average values
# Modified Seminario part goes here ...
# Connectivity information for Modified Seminario Method
central_atoms_angles = []
# A structure is created with the index giving the central
# atom of the angle, an array then lists the angles with
# that central atom.
# i.e. central_atoms_angles{3} contains an array of angles
# with central atom 3
for i in range(0, len(coords)):
central_atoms_angles.append([])
for j in range(0, len(angle_list)):
if i == angle_list[j][1]:
# For angle ABC, atoms A C are written to array
AC_array = [angle_list[j][0], angle_list[j][2], j]
central_atoms_angles[i].append(AC_array)
# For angle ABC, atoms C A are written to array
CA_array = [angle_list[j][2], angle_list[j][0], j]
central_atoms_angles[i].append(CA_array)
# Sort rows by atom number
for i in range(0, len(coords)):
central_atoms_angles[i] = sorted(
central_atoms_angles[i], key=itemgetter(0)
)
# Find normals u_PA for each angle
unit_PA_all_angles = []
for i in range(0, len(central_atoms_angles)):
unit_PA_all_angles.append([])
for j in range(0, len(central_atoms_angles[i])):
# For the angle at central_atoms_angles[i][j,:] the corresponding
# u_PA value is found for the plane ABC and bond AB,
# where ABC corresponds to the order of the arguements.
# This is why the reverse order was also added
unit_PA_all_angles[i].append(
u_PA_from_angles(
central_atoms_angles[i][j][0],
i,
central_atoms_angles[i][j][1],
coords,
)
)
# Finds the contributing factors from the other angle terms
# scaling_factor_all_angles = cell(max(max(angle_list)));
# This array will contain scaling factor and angle list position
scaling_factor_all_angles = []
for i in range(0, len(central_atoms_angles)):
scaling_factor_all_angles.append([])
for j in range(0, len(central_atoms_angles[i])):
n = 1
m = 1
angles_around = 0
additional_contributions = 0
scaling_factor_all_angles[i].append([0, 0])
# Position in angle list
scaling_factor_all_angles[i][j][1] = central_atoms_angles[i][
j
][2]
# Goes through the list of angles with the same central
# atom and computes the term need for the modified Seminario method
# Forwards directions, finds the same bonds with the central atom i
while (
((j + n) < len(central_atoms_angles[i]))
and central_atoms_angles[i][j][0]
== central_atoms_angles[i][j + n][0]
):
additional_contributions = (
additional_contributions
+ (
abs(
np.dot(
unit_PA_all_angles[i][j][:],
unit_PA_all_angles[i][j + n][:],
)
)
)
** 2
)
n = n + 1
angles_around = angles_around + 1
# Backwards direction, finds the same bonds with the central atom i
while ((j - m) >= 0) and central_atoms_angles[i][j][
0
] == central_atoms_angles[i][j - m][0]:
additional_contributions = (
additional_contributions
+ (
abs(
np.dot(
unit_PA_all_angles[i][j][:],
unit_PA_all_angles[i][j - m][:],
)
)
)
** 2
)
m = m + 1
angles_around = angles_around + 1
if n != 1 or m != 1:
# Finds the mean value of the additional contribution to
# change to normal Seminario method comment out + part
scaling_factor_all_angles[i][j][0] = 1 + (
additional_contributions / (m + n - 2)
)
else:
scaling_factor_all_angles[i][j][0] = 1
scaling_factors_angles_list = []
for i in range(0, len(angle_list)):
scaling_factors_angles_list.append([])
# Orders the scaling factors according to the angle list
for i in range(0, len(central_atoms_angles)):
for j in range(0, len(central_atoms_angles[i])):
scaling_factors_angles_list[
scaling_factor_all_angles[i][j][1]
].append(scaling_factor_all_angles[i][j][0])
# Finds the angle force constants with the scaling factors
# included for each angle
for i in range(0, len(angle_list)):
# Ensures that there is no difference when the
# ordering is changed
[AB_k_theta, AB_theta_0] = force_angle_constant(
angle_list[i][0],
angle_list[i][1],
angle_list[i][2],
bond_lengths,
eigenvalues,
eigenvectors,
coords,
scaling_factors_angles_list[i][0],
scaling_factors_angles_list[i][1],
)
[BA_k_theta, BA_theta_0] = force_angle_constant(
angle_list[i][2],
angle_list[i][1],
angle_list[i][0],
bond_lengths,
eigenvalues,
eigenvectors,
coords,
scaling_factors_angles_list[i][1],
scaling_factors_angles_list[i][0],
)
k_theta[i] = (AB_k_theta + BA_k_theta) / 2
theta_0[i] = (AB_theta_0 + BA_theta_0) / 2
# Vibrational_scaling takes into account DFT
# deficities / anharmonicity
k_theta[i] = k_theta[i] * vibrational_scaling_squared
file_angle.write(
atom_names[angle_list[i][0]]
+ "-"
+ atom_names[angle_list[i][1]]
+ "-"
+ atom_names[angle_list[i][2]]
+ " "
)
file_angle.write(
str("%#.4g" % k_theta[i])
+ " "
+ str("%#.4g" % theta_0[i])
+ " "
+ str(angle_list[i][0] + 1)
+ " "
+ str(angle_list[i][1] + 1)
+ " "
+ str(angle_list[i][2] + 1)
)
file_angle.write("\n")
unique_values_angles.append(
[
atom_names[angle_list[i][0]],
atom_names[angle_list[i][1]],
atom_names[angle_list[i][2]],
k_theta[i],
theta_0[i],
1,
]
)
file_angle.close()
[docs] def get_charges(self):
"""
Saves the atomic charges in a text file obtained from
the Gaussian log file.
"""
log_file = self.host_qm_pdb[:-4] + ".log"
with open(log_file, "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if "Fitting point charges to electrostatic potential" in lines[i]:
to_begin = int(i)
if " Sum of ESP charges =" in lines[i]:
to_end = int(i)
charges = lines[to_begin + 4 : to_end]
charge_list = []
for i in range(len(charges)):
charge_list.append(charges[i].strip().split())
charge_list_value = []
atom_list = []
for i in range(len(charge_list)):
charge_list_value.append(charge_list[i][2])
atom_list.append(charge_list[i][1])
data_tuples = list(zip(atom_list, charge_list_value))
df_charge = pd.DataFrame(data_tuples, columns=["Atom", "Charge"])
df_charge.to_csv(
self.charge_parameter_file, index=False, header=False, sep=" ",
)
[docs]class GuestAmberXMLAmber:
"""
A class used to generate a template force field XML file for the ligand
in order regenerate the reparameterised forcefield XML file.
This class contain methods to generate a template XML force field through
openforcefield. XML template generation can be obtained through different
file formats such as PDB, SDF, and SMI. Methods support charged ligands as
well. Re-parameterized XML force field files are then generated from the
template files. Different energy components such as the bond, angle,
torsional and non-bonded energies are computed for the non-reparametrized
and the reparameterized force fields. Difference between the
non-reparameterized and reparameterized force field energies can then be
analyzed.
...
Attributes
----------
charge : int
Charge of the ligand.
num_charge_atoms: int, optional
Number of charged atoms in the molecule.
charge_atom_1: int, optional
Charge on the first charged atom.
index_charge_atom_1: int, optional
Index of the first charged atom.
system_pdb: str, optional
Ligand PDB file with atom numbers beginning from 1.
system_mol2: str, optional
Ligand Mol2 file obtained from PDB file.
system_in: str, optional
Prepi file as required by antechamber.
system_frcmod: str, optional
FRCMOD file as required by antechamber.
prmtop_system : str, optional
Topology file obtained from the ligand PDB.
inpcrd_system : str, optional
Coordinate file obtained from the ligand PDB using the
command saveamberparm.
system_leap : str, optional
Amber generated leap file for generating and saving topology
and coordinate files.
system_xml: str, optional
Serialized XML force field file of the ligand.
system_smi: str, optional
Ligand SMILES format file.
system_sdf: str, optional
Ligand SDF (structure-data) format file.
system_init_sdf: str, optional
Ligand SDF (structure-data) format file. This file will be
generated only if the ligand is charged.
index_charge_atom_2: int, optional
Index of the second charged atom of the ligand.
charge_atom_2: int, optional
Charge on the second charged atom of the ligand.
charge_parameter_file: str, optional
File containing the charges of ligand atoms and their corresponding
atoms.
system_qm_pdb: str, optional
Ligand PDB file with atom numbers beginning from 1.
bond_parameter_file: str, optional
Text file containing the bond parameters for the ligand.
angle_parameter_file: str, optional
Text file containing the angle parameters of the ligand.
system_qm_params_file: str, optional
A text file containing the QM obtained parameters for the
ligand.
reparameterised_intermediate_system_xml_file: str, optional
XML foce field file with bond and angle parameter lines replaced by
corresponding values obtained from the QM calculations.
system_xml_non_bonded_file: str, optional
Text file to write the NonBondedForce Charge Parameters from
the non-parameterised system XML file.
system_xml_non_bonded_reparams_file: str, optional
Text file containing the non-bonded parameters parsed from the
XML force field file.
reparameterised_system_xml_file: str, optional
Reparameterized force field XML file obtained using
openforcefield.
non_reparameterised_system_xml_file: str, optional
Non-reparameterized force field XML file obtained using
openforcefield.
prmtop_system_non_params: str, optional
Amber generated topology file saved from the non-reparameterized
force field XML file for the ligand.
inpcrd_system_non_params: str, optional
Amber generated coordinate file saved from the non-reparameterized
force field XML file for the ligand.
prmtop_system_params: str, optional
Amber generated topology file saved from the reparameterized
force field XML file for the ligand.
inpcrd_system_params: str, optional
Amber generated coordinate file saved from the reparameterized
force field XML file for the ligand.
load_topology: str, optional
Argument to specify how to load the topology. Can either be "openmm"
or "parmed".
"""
[docs] def __init__(
self,
charge=0,
# TODO: some of these variables are ints, and shouldn't be initialized as strings
num_charge_atoms="",
charge_atom_1="",
index_charge_atom_1="",
system_pdb="guest_init_II.pdb",
system_mol2="guest.mol2",
system_in="guest.in",
system_frcmod="guest.frcmod",
prmtop_system="guest.prmtop",
inpcrd_system="guest.inpcrd",
system_leap="guest.leap",
system_xml="guest_init.xml",
system_smi="guest.smi",
system_sdf="guest.sdf",
system_init_sdf="guest_init.sdf",
index_charge_atom_2=" ",
charge_atom_2=" ",
charge_parameter_file="guest_qm_surround_charges.txt",
system_qm_pdb="guest_init_II.pdb",
bond_parameter_file="guest_bonds.txt",
angle_parameter_file="guest_angles.txt",
system_qm_params_file="guest_qm_params.txt",
reparameterised_intermediate_system_xml_file="guest_intermediate_reparameterised.xml",
system_xml_non_bonded_file="guest_xml_non_bonded.txt",
system_xml_non_bonded_reparams_file="guest_xml_non_bonded_reparams.txt",
reparameterised_system_xml_file="guest_reparameterised.xml",
non_reparameterised_system_xml_file="guest_init.xml",
prmtop_system_non_params="guest_non_params.prmtop",
inpcrd_system_non_params="guest_non_params.inpcrd",
prmtop_system_params="guest_params.prmtop",
inpcrd_system_params="guest_params.inpcrd",
load_topology="openmm",
):
self.charge = charge
self.num_charge_atoms = num_charge_atoms
self.charge_atom_1 = charge_atom_1
self.index_charge_atom_1 = index_charge_atom_1
self.system_pdb = system_pdb
self.system_mol2 = system_mol2
self.system_in = system_in
self.system_frcmod = system_frcmod
self.prmtop_system = prmtop_system
self.inpcrd_system = inpcrd_system
self.system_leap = system_leap
self.system_xml = system_xml
self.system_smi = system_smi
self.system_sdf = system_sdf
self.system_init_sdf = system_init_sdf
self.index_charge_atom_2 = index_charge_atom_2
self.charge_atom_2 = charge_atom_2
self.charge_parameter_file = charge_parameter_file
self.system_qm_pdb = system_qm_pdb
self.bond_parameter_file = bond_parameter_file
self.angle_parameter_file = angle_parameter_file
self.system_qm_params_file = system_qm_params_file
self.reparameterised_intermediate_system_xml_file = (
reparameterised_intermediate_system_xml_file
)
self.system_xml_non_bonded_file = system_xml_non_bonded_file
self.system_xml_non_bonded_reparams_file = (
system_xml_non_bonded_reparams_file
)
self.reparameterised_system_xml_file = reparameterised_system_xml_file
self.non_reparameterised_system_xml_file = (
non_reparameterised_system_xml_file
)
self.prmtop_system_non_params = prmtop_system_non_params
self.inpcrd_system_non_params = inpcrd_system_non_params
self.prmtop_system_params = prmtop_system_params
self.inpcrd_system_params = inpcrd_system_params
self.load_topology = load_topology
[docs] def generate_xml_antechamber(self):
"""
Generates an XML forcefield file from the PDB file through antechamber.
"""
command = (
# "babel -ipdb " + self.system_pdb + " -omol2 " + self.system_mol2
"obabel -ipdb "
+ self.system_pdb
+ " -omol2 -O "
+ self.system_mol2
)
os.system(command)
command = (
"antechamber -i "
+ self.system_mol2
+ " -fi mol2 -o "
+ self.system_in
+ " -fo prepi -c bcc -nc "
+ str(self.charge)
)
os.system(command)
command = (
"parmchk2 -i "
+ self.system_in
+ " -o "
+ self.system_frcmod
+ " -f prepi -a Y"
)
os.system(command)
os.system(
"rm -rf ANTECHAMBER* leap.log sqm* ATOMTYPE.INF PREP.INF NEWPDB.PDB"
)
line_1 = "loadamberprep " + self.system_in
line_2 = "loadamberparams " + self.system_frcmod
line_3 = "pdb = loadpdb " + self.system_pdb
line_4 = (
"saveamberparm pdb "
+ self.prmtop_system
+ " "
+ self.inpcrd_system
)
line_5 = "quit"
with open(self.system_leap, "w") as f:
f.write(" " + "\n")
f.write(line_1 + "\n")
f.write(line_2 + "\n")
f.write(line_3 + "\n")
f.write(line_4 + "\n")
f.write(line_5 + "\n")
command = "tleap -f " + self.system_leap
os.system(command)
parm = parmed.load_file(self.prmtop_system, self.inpcrd_system)
system = parm.createSystem()
with open(self.system_xml, "w") as f:
f.write(simtk.openmm.XmlSerializer.serialize(system))
[docs] def generate_xml_from_pdb_smi(self):
"""
Generates an XML forcefield file from the SMILES file through
openforcefield.
"""
# off_molecule = openforcefield.topology.Molecule(self.system_smi)
off_molecule = Molecule(self.system_smi)
# force_field = openforcefield.typing.engines.smirnoff.ForceField("openff_unconstrained-1.0.0.offxml")
force_field = ForceField("openff_unconstrained-1.0.0.offxml")
system = force_field.create_openmm_system(off_molecule.to_topology())
pdbfile = simtk.openmm.app.PDBFile(self.system_pdb)
structure = parmed.openmm.load_topology(
pdbfile.topology, system, xyz=pdbfile.positions
)
with open(self.system_xml, "w") as f:
f.write(simtk.openmm.XmlSerializer.serialize(system))
[docs] def generate_xml_from_pdb_sdf(self):
"""
Generates an XML forcefield file from the SDF file through
openforcefield.
"""
command = (
# "babel -ipdb " + self.system_pdb + " -osdf " + self.system_sdf
"obabel -ipdb "
+ self.system_pdb
+ " -osdf -O "
+ self.system_sdf
)
os.system(command)
# off_molecule = openforcefield.topology.Molecule(self.system_sdf)
off_molecule = Molecule(self.system_sdf)
# force_field = openforcefield.typing.engines.smirnoff.ForceField("openff_unconstrained-1.0.0.offxml")
force_field = ForceField("openff_unconstrained-1.0.0.offxml")
system = force_field.create_openmm_system(off_molecule.to_topology())
pdbfile = simtk.openmm.app.PDBFile(self.system_pdb)
structure = parmed.openmm.load_topology(
pdbfile.topology, system, xyz=pdbfile.positions
)
with open(self.system_xml, "w") as f:
f.write(simtk.openmm.XmlSerializer.serialize(system))
[docs] def generate_xml_from_charged_pdb_sdf(self):
"""
Generates an XML forcefield file for a singly charged ligand molecule
from the SDF file through openforcefield.
"""
command = (
# "babel -ipdb " + self.system_pdb + " -osdf " + self.system_init_sdf
"obabel -ipdb "
+ self.system_pdb
+ " -osdf -O "
+ self.system_init_sdf
)
os.system(command)
with open(self.system_init_sdf, "r") as f1:
filedata = f1.readlines()
filedata = filedata[:-2]
with open(self.system_sdf, "w+") as out:
for i in filedata:
out.write(i)
line_1 = (
"M CHG "
+ str(self.num_charge_atoms)
+ " "
+ str(self.index_charge_atom_1)
+ " "
+ str(self.charge_atom_1)
+ "\n"
)
line_2 = "M END" + "\n"
line_3 = "$$$$"
out.write(line_1)
out.write(line_2)
out.write(line_3)
# off_molecule = openforcefield.topology.Molecule(self.system_sdf)
off_molecule = Molecule(self.system_sdf)
# force_field = openforcefield.typing.engines.smirnoff.ForceField("openff_unconstrained-1.0.0.offxml")
force_field = ForceField("openff_unconstrained-1.0.0.offxml")
system = force_field.create_openmm_system(off_molecule.to_topology())
pdbfile = simtk.openmm.app.PDBFile(self.system_pdb)
structure = parmed.openmm.load_topology(
pdbfile.topology, system, xyz=pdbfile.positions
)
with open(self.system_xml, "w") as f:
f.write(simtk.openmm.XmlSerializer.serialize(system))
[docs] def generate_xml_from_doubly_charged_pdb_sdf(self):
"""
Generates an XML forcefield file for a singly charged ligand molecule
from the SDF file through openforcefield.
"""
command = (
# "babel -ipdb " + self.system_pdb + " -osdf " + self.system_init_sdf
"obabel -ipdb "
+ self.system_pdb
+ " -osdf -O "
+ self.system_init_sdf
)
os.system(command)
with open(self.system_init_sdf, "r") as f1:
filedata = f1.readlines()
filedata = filedata[:-2]
with open(self.system_sdf, "w+") as out:
for i in filedata:
out.write(i)
line_1 = (
"M CHG "
+ str(self.num_charge_atoms)
+ " "
+ str(self.index_charge_atom_1)
+ " "
+ str(self.charge_atom_1)
+ " "
+ str(self.index_charge_atom_2)
+ " "
+ str(self.charge_atom_2)
+ "\n"
)
line_2 = "M END" + "\n"
line_3 = "$$$$"
out.write(line_1)
out.write(line_2)
out.write(line_3)
# off_molecule = openforcefield.topology.Molecule(self.system_sdf)
off_molecule = Molecule(self.system_sdf)
# force_field = openforcefield.typing.engines.smirnoff.ForceField("openff_unconstrained-1.0.0.offxml")
force_field = ForceField("openff_unconstrained-1.0.0.offxml")
system = force_field.create_openmm_system(off_molecule.to_topology())
pdbfile = simtk.openmm.app.PDBFile(self.system_pdb)
structure = parmed.openmm.load_topology(
pdbfile.topology, system, xyz=pdbfile.positions
)
with open(self.system_xml, "w") as f:
f.write(simtk.openmm.XmlSerializer.serialize(system))
[docs] def write_system_params(self):
"""
Saves the parameters obtained from the QM log files in a text file.
"""
# Charges from QM files
df_charges = pd.read_csv(
self.charge_parameter_file, header=None, delimiter=r"\s+"
)
df_charges.columns = ["atom", "charges"]
qm_charges = df_charges["charges"].values.tolist()
qm_charges = [round(num, 6) for num in qm_charges]
# print(qm_charges)
# Bond Parameters from QM files
ppdb = PandasPdb()
ppdb.read_pdb(self.system_qm_pdb)
atom_name_list = ppdb.df["ATOM"]["atom_number"].values.tolist()
atom_name_list = [i - 1 for i in atom_name_list]
# print(atom_name_list)
df = pd.read_csv(
self.bond_parameter_file, header=None, delimiter=r"\s+"
)
df.columns = [
"bond",
"k_bond",
"bond_length",
"bond_1",
"bond_2",
]
# print(df.head())
bond_1_list = df["bond_1"].values.tolist()
bond_1_list = [x - 1 + min(atom_name_list) for x in bond_1_list]
bond_2_list = df["bond_2"].values.tolist()
bond_2_list = [x - 1 + min(atom_name_list) for x in bond_2_list]
# print(bond_1_list)
# print(bond_2_list)
k_bond_list = df["k_bond"].values.tolist()
#k_bond_list = [
# i * 418.40 for i in k_bond_list
#] # kcal/mol * A^2 to kJ/mol * nm^2
k_bond_list = [
i * KCAL_MOL_PER_KJ_MOL * ANGSTROMS_PER_NM**2 for i in k_bond_list
] # kcal/mol * A^2 to kJ/mol * nm^2
k_bond_list = [round(num, 10) for num in k_bond_list]
# print(k_bond_list)
bond_length_list = df["bond_length"].values.tolist()
# TODO: units here? Anstroms per nm?
bond_length_list = [i / 10.00 for i in bond_length_list]
bond_length_list = [round(num, 6) for num in bond_length_list]
# print(bond_length_list)
# Angle Parameters from QM files
ppdb = PandasPdb()
ppdb.read_pdb(self.system_qm_pdb)
atom_name_list = ppdb.df["ATOM"]["atom_number"].values.tolist()
atom_name_list = [i - 1 for i in atom_name_list]
# print(atom_name_list)
df = pd.read_csv(
self.angle_parameter_file, header=None, delimiter=r"\s+"
)
df.columns = [
"angle",
"k_angle",
"angle_degrees",
"angle_1",
"angle_2",
"angle_3",
]
# print(df.head())
angle_1_list = df["angle_1"].values.tolist()
angle_1_list = [x - 1 + min(atom_name_list) for x in angle_1_list]
# print(angle_1_list)
angle_2_list = df["angle_2"].values.tolist()
angle_2_list = [x - 1 + min(atom_name_list) for x in angle_2_list]
# print(angle_2_list)
angle_3_list = df["angle_3"].values.tolist()
angle_3_list = [x - 1 + min(atom_name_list) for x in angle_3_list]
# print(angle_3_list)
k_angle_list = df["k_angle"].values.tolist()
k_angle_list = [
i * KCAL_MOL_PER_KJ_MOL for i in k_angle_list
] # kcal/mol * radian^2 to kJ/mol * radian^2
k_angle_list = [round(num, 6) for num in k_angle_list]
# print(k_angle_list)
angle_list = df["angle_degrees"].values.tolist()
angle_list = [i * RADIANS_PER_DEGREE for i in angle_list]
angle_list = [round(num, 6) for num in angle_list]
# print(angle_list)
xml = open(self.system_qm_params_file, "w")
xml.write("Begin writing the Bond Parameters" + "\n")
# TODO: These should use string formatting to become more concise
for i in range(len(k_bond_list)):
xml.write(
" "
+ "<Bond"
+ " "
+ "d="
+ '"'
+ str(bond_length_list[i])
+ '"'
+ " "
+ "k="
+ '"'
+ str(k_bond_list[i])
+ '"'
+ " "
+ "p1="
+ '"'
+ str(bond_1_list[i])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(bond_2_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Bond Parameters" + "\n")
xml.write("Begin writing the Angle Parameters" + "\n")
for i in range(len(k_angle_list)):
xml.write(
" "
+ "<Angle"
+ " "
+ "a="
+ '"'
+ str(angle_list[i])
+ '"'
+ " "
+ "k="
+ '"'
+ str(k_angle_list[i])
+ '"'
+ " "
+ "p1="
+ '"'
+ str(angle_1_list[i])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(angle_2_list[i])
+ '"'
+ " "
+ "p3="
+ '"'
+ str(angle_3_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Angle Parameters" + "\n")
xml.write("Begin writing the Charge Parameters" + "\n")
for i in range(len(qm_charges)):
xml.write(
"<Particle"
+ " "
+ "q="
+ '"'
+ str(qm_charges[i])
+ '"'
+ " "
+ "eps="
+ '"'
+ str(0.00)
+ '"'
+ " "
+ "sig="
+ '"'
+ str(0.00)
+ '"'
+ " "
+ "atom="
+ '"'
+ str(atom_name_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Charge Parameters" + "\n")
xml.close()
[docs] def write_reparameterised_system_xml(self):
"""
Writes a reparameterised XML force field file for the ligand.
"""
# Bond Parameters
f_params = open(self.system_qm_params_file, "r")
lines_params = f_params.readlines()
# Bond Parameters
for i in range(len(lines_params)):
if "Begin writing the Bond Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Bond Parameters" in lines_params[i]:
to_end = int(i)
bond_params = lines_params[to_begin + 1 : to_end]
index_search_replace_bond = []
# TODO: These should use string formatting to become more concise
for i in bond_params:
bond_line_to_replace = i
# print(bond_line_to_replace)
atom_number_list = [
re.findall("\d*\.?\d+", i)[3],
re.findall("\d*\.?\d+", i)[5],
]
# print(atom_number_list)
comb_1 = (
"p1="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[1]
+ '"'
+ "/>"
)
comb_2 = (
"p1="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[0]
+ '"'
+ "/>"
)
comb_list_bond = [comb_1, comb_2]
# print(comb_list_bond)
list_search_bond = [
search_in_file(file=self.system_xml, word=comb_1),
search_in_file(file=self.system_xml, word=comb_2),
]
# print(list_search_bond)
for j in range(len(list_search_bond)):
if list_search_bond[j] != []:
to_add = (list_search_bond[j], i)
# print(to_add)
index_search_replace_bond.append(to_add)
# Angle Parameters
for i in range(len(lines_params)):
if "Begin writing the Angle Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Angle Parameters" in lines_params[i]:
to_end = int(i)
angle_params = lines_params[to_begin + 1 : to_end]
index_search_replace_angle = []
for i in angle_params:
angle_line_to_replace = i
# print(angle_line_to_replace)
index_search_replace_angle = []
for i in angle_params:
angle_line_to_replace = i
# print(angle_line_to_replace)
atom_number_list = [
re.findall("\d*\.?\d+", i)[3],
re.findall("\d*\.?\d+", i)[5],
re.findall("\d*\.?\d+", i)[7],
]
# print(atom_number_list)
comb_1 = (
"p1="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[2]
+ '"'
+ "/>"
)
comb_2 = (
"p1="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[1]
+ '"'
+ "/>"
)
comb_3 = (
"p1="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[2]
+ '"'
+ "/>"
)
comb_4 = (
"p1="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[0]
+ '"'
+ "/>"
)
comb_5 = (
"p1="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[1]
+ '"'
+ "/>"
)
comb_6 = (
"p1="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[0]
+ '"'
+ "/>"
)
comb_list_angle = [
comb_1,
comb_2,
comb_3,
comb_4,
comb_5,
comb_6,
]
# print(comb_list_angle)
list_search_angle = [
search_in_file(file=self.system_xml, word=comb_1),
search_in_file(file=self.system_xml, word=comb_2),
search_in_file(file=self.system_xml, word=comb_3),
search_in_file(file=self.system_xml, word=comb_4),
search_in_file(file=self.system_xml, word=comb_5),
search_in_file(file=self.system_xml, word=comb_6),
]
# print(list_search_angle)
for j in range(len(list_search_angle)):
if list_search_angle[j] != []:
to_add = (list_search_angle[j], i)
# print(to_add)
index_search_replace_angle.append(to_add)
f_org = open(self.system_xml)
lines = f_org.readlines()
for i in range(len(index_search_replace_bond)):
line_number = index_search_replace_bond[i][0][0][0] - 1
line_to_replace = index_search_replace_bond[i][0][0][1]
line_to_replace_with = index_search_replace_bond[i][1]
lines[line_number] = line_to_replace_with
for i in range(len(index_search_replace_angle)):
line_number = index_search_replace_angle[i][0][0][0] - 1
line_to_replace = index_search_replace_angle[i][0][0][1]
line_to_replace_with = index_search_replace_angle[i][1]
lines[line_number] = line_to_replace_with
f_cop = open(self.reparameterised_intermediate_system_xml_file, "w")
for i in lines:
f_cop.write(i)
f_cop.close()
f_params = open(self.system_qm_params_file)
lines_params = f_params.readlines()
# Charge Parameters
for i in range(len(lines_params)):
if "Begin writing the Charge Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Charge Parameters" in lines_params[i]:
to_end = int(i)
charge_params = lines_params[to_begin + 1 : to_end]
non_bonded_index = []
for k in charge_params:
non_bonded_index.append(int(re.findall("[-+]?\d*\.\d+|\d+", k)[3]))
charge_for_index = []
for k in charge_params:
charge_for_index.append(
float(re.findall("[-+]?\d*\.\d+|\d+", k)[0])
)
xml_off = open(self.system_xml)
xml_off_lines = xml_off.readlines()
for i in range(len(xml_off_lines)):
if "<GlobalParameters/>" in xml_off_lines[i]:
to_begin = int(i)
if "<Exceptions>" in xml_off_lines[i]:
to_end = int(i)
nonbond_params = xml_off_lines[to_begin + 4 : to_end - 1]
# print(len(nonbond_params))
f_non_bonded = open(self.system_xml_non_bonded_file, "w")
for x in nonbond_params:
f_non_bonded.write(x)
f_non_bonded = open(self.system_xml_non_bonded_file)
lines_non_bonded = f_non_bonded.readlines()
# print(len(lines_non_bonded))
lines_non_bonded_to_write = []
for i in range(len(non_bonded_index)):
line_ = lines_non_bonded[non_bonded_index[i]]
# print(line_)
eps = float(re.findall("[-+]?\d*\.\d+|\d+", line_)[0])
sig = float(re.findall("[-+]?\d*\.\d+|\d+", line_)[2])
line_to_replace = (
" "
+ "<Particle "
+ "eps="
+ '"'
+ str(eps)
+ '"'
+ " "
+ "q="
+ '"'
+ str(charge_for_index[i])
+ '"'
+ " "
+ "sig="
+ '"'
+ str(sig)
+ '"'
+ "/>"
)
lines_non_bonded_to_write.append(line_to_replace)
data_ = list(zip(non_bonded_index, lines_non_bonded_to_write))
df_non_bonded_params = pd.DataFrame(
data_, columns=["line_index", "line"]
)
# print(df_non_bonded_params.head())
f_non_bonded_ = open(self.system_xml_non_bonded_file)
lines_non_bonded_ = f_non_bonded_.readlines()
for i in range(len(lines_non_bonded_)):
if i in non_bonded_index:
lines_non_bonded_[i] = (
df_non_bonded_params.loc[
df_non_bonded_params.line_index == i, "line"
].values[0]
) + "\n"
# print(len(lines_non_bonded_))
f_write_non_bonded_reparams = open(
self.system_xml_non_bonded_reparams_file, "w"
)
for p in range(len(lines_non_bonded_)):
f_write_non_bonded_reparams.write(lines_non_bonded_[p])
f_write_non_bonded_reparams.close()
f_ = open(self.system_xml_non_bonded_reparams_file)
lines_ = f_.readlines()
print(len(lines_) == len(lines_non_bonded))
xml_off = open(self.reparameterised_intermediate_system_xml_file)
# TODO: implement function(s) to read certain types of files. DRY principle
xml_off_lines = xml_off.readlines()
for i in range(len(xml_off_lines)):
if "<GlobalParameters/>" in xml_off_lines[i]:
to_begin = int(i)
if "<Exceptions>" in xml_off_lines[i]:
to_end = int(i)
lines_before_params = xml_off_lines[: to_begin + 4]
f__ = open(self.system_xml_non_bonded_reparams_file)
lines_params_non_bonded = f__.readlines()
lines_after_params = xml_off_lines[to_end - 1 :]
f_reparams_xml = open(self.reparameterised_system_xml_file, "w")
for x in lines_before_params:
f_reparams_xml.write(x)
for x in lines_params_non_bonded:
f_reparams_xml.write(x)
for x in lines_after_params:
f_reparams_xml.write(x)
f_reparams_xml.close()
[docs] def save_amber_params_non_qm_charges(self):
"""
Saves amber generated topology files for the ligand
without the QM charges.
"""
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
openmm_system.save(self.prmtop_system_non_params, overwrite=True)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(self.inpcrd_system_non_params, overwrite=True)
parm = parmed.load_file(
self.prmtop_system_non_params, self.inpcrd_system_non_params,
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_non_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_non_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(
self.reparameterised_intermediate_system_xml_file
),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(
self.reparameterised_intermediate_system_xml_file
),
)
openmm_system.save(self.prmtop_system_params, overwrite=True)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(self.inpcrd_system_params, overwrite=True)
parm = parmed.load_file(
self.prmtop_system_params, self.inpcrd_system_params
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(
self.reparameterised_intermediate_system_xml_file
),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)
[docs] def save_amber_params(self):
"""
Saves amber generated topology files for the ligand.
"""
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
openmm_system.save(self.prmtop_system_non_params, overwrite=True)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(self.inpcrd_system_non_params, overwrite=True)
parm = parmed.load_file(
self.prmtop_system_non_params, self.inpcrd_system_non_params,
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_non_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_non_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(self.reparameterised_system_xml_file),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(self.reparameterised_system_xml_file),
)
openmm_system.save(self.prmtop_system_params, overwrite=True)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(self.inpcrd_system_params, overwrite=True)
parm = parmed.load_file(
self.prmtop_system_params, self.inpcrd_system_params
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(self.reparameterised_system_xml_file),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)
[docs] def analyze_diff_energies(self):
"""
Compares the energies of the ligand obtained from the non-parameterized
and the parameterized force field files.
"""
parm_non_params = parmed.load_file(
self.prmtop_system_non_params, self.inpcrd_system_non_params,
)
prmtop_energy_decomposition_non_params = parmed.openmm.energy_decomposition_system(
parm_non_params, parm_non_params.createSystem()
)
prmtop_energy_decomposition_non_params_value = [
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_non_params
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_non_params
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_non_params
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_non_params
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_non_params_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_non_params = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_non_params_list,
prmtop_energy_decomposition_non_params_value,
)
),
columns=["Energy_term", "Energy_parm_non_params"],
)
df_energy_non_params = df_energy_non_params.set_index("Energy_term")
# print(df_energy_non_params)
parm_params = parmed.load_file(
self.prmtop_system_params, self.inpcrd_system_params
)
prmtop_energy_decomposition_params = parmed.openmm.energy_decomposition_system(
parm_params, parm_params.createSystem()
)
prmtop_energy_decomposition_params_value = [
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_params
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_params
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_params
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_params
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_params_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_params = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_params_list,
prmtop_energy_decomposition_params_value,
)
),
columns=["Energy_term", "Energy_parm_params"],
)
df_energy_params = df_energy_params.set_index("Energy_term")
# print(df_energy_params)
df_compare = pd.concat(
[df_energy_non_params, df_energy_params], axis=1
)
df_compare["Energy_difference"] = df_compare[
"Energy_parm_non_params"
].sub(df_compare["Energy_parm_params"], axis=0)
print(df_compare)
[docs]class HostAmberXMLAmber:
"""
A class used to generate a template force field XML file for the receptor
in order regenerate the reparameterised forcefield XML file.
This class contain methods to generate a template XML force field through
openforcefield. Re-parameterized XML force field files are then
generated from the template files. Different energy components such as
bond, angle, torsional and non-bonded energies are computed for the
non-reparametrized and the reparameterized force fields. Difference
between the non-reparameterized and reparameterized force field energies
can then be analyzed.
...
Attributes
----------
system_pdb: str, optional
Receptor PDB file with atom numbers beginning from 1.
system_sdf: str, optional
Receptor SDF (structure-data) format file.
charge : int
Charge of the ligand.
system_mol2: str, optional
Receptor Mol2 file obtained from PDB file.
system_in: str, optional
Prepi file as required by antechamber.
system_frcmod: str, optional
FRCMOD file as required by antechamber.
prmtop_system : str, optional
Topology file obtained from the receptor PDB.
inpcrd_system : str, optional
Coordinate file obtained from the receptor PDB using the
command saveamberparm.
system_leap : str, optional
Amber generated leap file for generating and saving topology
and coordinate files.
system_xml: str, optional
Serilazed XML force field file of the receptor.
sim_output: str, optional
PDB file containing the trajectory coordinates for the OpenMM
simulation.
sim_steps: str, optional
Number of steps in the OpenMM MD simulation.
charge_parameter_file: str, optional
File containing the charges of receptor atoms and their
corresponding atoms.
system_qm_pdb: str, optional
Receptor QM region's PDB file with atom numbers beginning from 1.
bond_parameter_file: str, optional
Text file containing the bond parameters for the receptor.
angle_parameter_file: str, optional
Text file containing the angle parameters of the receptor.
system_qm_params_file: str, optional
A text file containing the QM obtained parameters for the
receptor.
reparameterised_intermediate_system_xml_file: str, optional
XML force field file with bond and angle parameter lines replaced by
corresponding values obtained from the QM calculations.
system_xml_non_bonded_file: str, optional
Text file to write the NonBondedForce Charge Parameters from
the non-parameterised system XML file.
system_xml_non_bonded_reparams_file: str, optional
Text file containing the non-bonded parameters parsed from the
XML force field file.
reparameterised_system_xml_file: str, optional
Reparameterized force field XML file obtained using
openforcefield.
non_reparameterised_system_xml_file: str, optional
Non-reparameterized force field XML file obtained using
openforcefield.
prmtop_system_non_params: str, optional
Amber generated topology file saved from the non-reparameterized
force field XML file for the receptor.
inpcrd_system_non_params: str, optional
Amber generated coordinate file saved from the non-reparameterized
force field XML file for the receptor.
prmtop_system_params: str, optional
Amber generated topology file saved from the reparameterized
force field XML file for the receptor.
inpcrd_system_params: str, optional
Amber generated coordinate file saved from the reparameterized
force field XML file for the receptor.
load_topology: str, optional
Argument to specify how to load the topology. Can either be "openmm"
or "parmed".
"""
[docs] def __init__(
self,
system_pdb="host.pdb",
system_sdf="host.sdf",
charge=0,
system_mol2="host.mol2",
system_in="host.in",
system_frcmod="host.frcmod",
prmtop_system="host.prmtop",
inpcrd_system="host.inpcrd",
system_leap="host.leap",
system_xml="host.xml",
sim_output="sim_output.pdb",
sim_steps=1000,
charge_parameter_file="host_qm_surround_charges.txt",
system_qm_pdb="host_qm.pdb",
bond_parameter_file="host_qm_bonds.txt",
angle_parameter_file="host_qm_angles.txt",
system_qm_params_file="host_qm_params.txt",
reparameterised_intermediate_system_xml_file="host_intermediate_reparameterised.xml",
system_xml_non_bonded_file="host_xml_non_bonded.txt",
system_xml_non_bonded_reparams_file="host_xml_non_bonded_reparams.txt",
reparameterised_system_xml_file="host_reparameterised.xml",
non_reparameterised_system_xml_file="host.xml",
prmtop_system_non_params="host_non_params.prmtop",
inpcrd_system_non_params="host_non_params.inpcrd",
prmtop_system_params="host_params.prmtop",
inpcrd_system_params="host_params.inpcrd",
load_topology="openmm",
):
self.system_pdb = system_pdb
self.system_sdf = system_sdf
self.charge = charge
self.system_mol2 = system_mol2
self.system_in = system_in
self.system_frcmod = system_frcmod
self.prmtop_system = prmtop_system
self.inpcrd_system = inpcrd_system
self.system_leap = system_leap
self.system_xml = system_xml
self.sim_output = sim_output
self.sim_steps = sim_steps
self.charge_parameter_file = charge_parameter_file
self.system_qm_pdb = system_qm_pdb
self.bond_parameter_file = bond_parameter_file
self.angle_parameter_file = angle_parameter_file
self.system_qm_params_file = system_qm_params_file
self.reparameterised_intermediate_system_xml_file = (
reparameterised_intermediate_system_xml_file
)
self.system_xml_non_bonded_file = system_xml_non_bonded_file
self.system_xml_non_bonded_reparams_file = (
system_xml_non_bonded_reparams_file
)
self.reparameterised_system_xml_file = reparameterised_system_xml_file
self.non_reparameterised_system_xml_file = (
non_reparameterised_system_xml_file
)
self.prmtop_system_non_params = prmtop_system_non_params
self.inpcrd_system_non_params = inpcrd_system_non_params
self.prmtop_system_params = prmtop_system_params
self.inpcrd_system_params = inpcrd_system_params
self.load_topology = load_topology
[docs] def generate_xml_from_pdb_sdf(self):
"""
Generates an XML forcefield file from the SDF file through
openforcefield.
"""
command = (
# "babel -ipdb " + self.system_pdb + " -osdf " + self.system_sdf
"obabel -ipdb "
+ self.system_pdb
+ " -osdf -O "
+ self.system_sdf
)
os.system(command)
# off_molecule = openforcefield.topology.Molecule(self.system_sdf)
off_molecule = Molecule(self.system_sdf)
# force_field = openforcefield.typing.engines.smirnoff.ForceField("openff_unconstrained-1.0.0.offxml")
force_field = ForceField("openff_unconstrained-1.0.0.offxml")
system = force_field.create_openmm_system(off_molecule.to_topology())
pdbfile = simtk.openmm.app.PDBFile(self.system_pdb)
structure = parmed.openmm.load_topology(
pdbfile.topology, system, xyz=pdbfile.positions
)
with open(self.system_xml, "w") as f:
f.write(simtk.openmm.XmlSerializer.serialize(system))
[docs] def generate_xml_antechamber(self):
"""
Generates an XML forcefield file from the PDB file through antechamber.
"""
command = (
# "babel -ipdb " + self.system_pdb + " -omol2 " + self.system_mol2
"obabel -ipdb "
+ self.system_pdb
+ " -omol2 -O "
+ self.system_mol2
)
os.system(command)
command = (
"antechamber -i "
+ self.system_mol2
+ " -fi mol2 -o "
+ self.system_in
+ " -fo prepi -c bcc -nc "
+ str(self.charge)
)
os.system(command)
command = (
"parmchk2 -i "
+ self.system_in
+ " -o "
+ self.system_frcmod
+ " -f prepi -a Y"
)
os.system(command)
os.system(
"rm -rf ANTECHAMBER* leap.log sqm* ATOMTYPE.INF PREP.INF NEWPDB.PDB"
)
line_1 = "loadamberprep " + self.system_in
line_2 = "loadamberparams " + self.system_frcmod
line_3 = "pdb = loadpdb " + self.system_pdb
line_4 = (
"saveamberparm pdb "
+ self.prmtop_system
+ " "
+ self.inpcrd_system
)
line_5 = "quit"
with open(self.system_leap, "w") as f:
f.write(" " + "\n")
f.write(line_1 + "\n")
f.write(line_2 + "\n")
f.write(line_3 + "\n")
f.write(line_4 + "\n")
f.write(line_5 + "\n")
command = "tleap -f " + self.system_leap
os.system(command)
parm = parmed.load_file(self.prmtop_system, self.inpcrd_system)
system = parm.createSystem()
with open(self.system_xml, "w") as f:
f.write(simtk.openmm.XmlSerializer.serialize(system))
[docs] def serialize_system(self):
pdb = simtk.openmm.app.PDBFile(self.system_pdb)
forcefield = simtk.openmm.app.ForceField("amber14-all.xml")
system = forcefield.createSystem(pdb.topology)
integrator = simtk.openmm.LangevinIntegrator(
300 * simtk.unit.kelvin,
1 / simtk.unit.picosecond,
0.002 * simtk.unit.picoseconds,
)
simulation = simtk.openmm.app.Simulation(
pdb.topology, system, integrator
)
simulation.context.setPositions(pdb.positions)
simulation.minimizeEnergy(maxIterations=100000)
state = simulation.context.getState(getEnergy=True)
energy = state.getPotentialEnergy()
print(energy)
simulation.reporters.append(
simtk.openmm.app.PDBReporter(self.sim_output, self.sim_steps / 10)
)
simulation.reporters.append(
simtk.openmm.app.StateDataReporter(
stdout,
reportInterval=int(self.sim_steps / 10),
step=True,
potentialEnergy=True,
temperature=True,
)
)
simulation.step(self.sim_steps)
command = "rm -rf " + self.sim_output
os.system(command)
with open(self.system_xml, "w") as f:
f.write(simtk.openmm.XmlSerializer.serialize(system))
[docs] def write_system_params(self):
"""
Saves the parameters obtained from the QM log files in a text file.
"""
# Charges from QM files
df_charges = pd.read_csv(
self.charge_parameter_file, header=None, delimiter=r"\s+"
)
df_charges.columns = ["atom", "charges"]
qm_charges = df_charges["charges"].values.tolist()
qm_charges = [round(num, 6) for num in qm_charges]
# print(qm_charges)
# Bond Parameters from QM files
ppdb = PandasPdb()
ppdb.read_pdb(self.system_qm_pdb)
atom_name_list = ppdb.df["ATOM"]["atom_number"].values.tolist()
atom_name_list = [i - 1 for i in atom_name_list]
# print(atom_name_list)
df = pd.read_csv(
self.bond_parameter_file, header=None, delimiter=r"\s+"
)
df.columns = [
"bond",
"k_bond",
"bond_length",
"bond_1",
"bond_2",
]
# print(df.head())
bond_1_list = df["bond_1"].values.tolist()
bond_1_list = [x - 1 + min(atom_name_list) for x in bond_1_list]
bond_2_list = df["bond_2"].values.tolist()
bond_2_list = [x - 1 + min(atom_name_list) for x in bond_2_list]
# print(bond_1_list)
# print(bond_2_list)
k_bond_list = df["k_bond"].values.tolist()
k_bond_list = [
i * KCAL_MOL_PER_KJ_MOL * ANGSTROMS_PER_NM**2 for i in k_bond_list
] # kcal/mol * A^2 to kJ/mol * nm^2
k_bond_list = [round(num, 10) for num in k_bond_list]
# print(k_bond_list)
bond_length_list = df["bond_length"].values.tolist()
bond_length_list = [i / 10.00 for i in bond_length_list]
bond_length_list = [round(num, 6) for num in bond_length_list]
# print(bond_length_list)
# Angle Parameters from QM files
ppdb = PandasPdb()
ppdb.read_pdb(self.system_qm_pdb)
atom_name_list = ppdb.df["ATOM"]["atom_number"].values.tolist()
atom_name_list = [i - 1 for i in atom_name_list]
# print(atom_name_list)
df = pd.read_csv(
self.angle_parameter_file, header=None, delimiter=r"\s+"
)
df.columns = [
"angle",
"k_angle",
"angle_degrees",
"angle_1",
"angle_2",
"angle_3",
]
# print(df.head())
angle_1_list = df["angle_1"].values.tolist()
angle_1_list = [x - 1 + min(atom_name_list) for x in angle_1_list]
# print(angle_1_list)
angle_2_list = df["angle_2"].values.tolist()
angle_2_list = [x - 1 + min(atom_name_list) for x in angle_2_list]
# print(angle_2_list)
angle_3_list = df["angle_3"].values.tolist()
angle_3_list = [x - 1 + min(atom_name_list) for x in angle_3_list]
# print(angle_3_list)
k_angle_list = df["k_angle"].values.tolist()
k_angle_list = [
i * 4.184 for i in k_angle_list
] # kcal/mol * radian^2 to kJ/mol * radian^2
k_angle_list = [round(num, 6) for num in k_angle_list]
# print(k_angle_list)
angle_list = df["angle_degrees"].values.tolist()
angle_list = [(i * math.pi) / 180.00 for i in angle_list]
angle_list = [round(num, 6) for num in angle_list]
# print(angle_list)
xml = open(self.system_qm_params_file, "w")
xml.write("Begin writing the Bond Parameters" + "\n")
for i in range(len(k_bond_list)):
xml.write(
" "
+ "<Bond"
+ " "
+ "d="
+ '"'
+ str(bond_length_list[i])
+ '"'
+ " "
+ "k="
+ '"'
+ str(k_bond_list[i])
+ '"'
+ " "
+ "p1="
+ '"'
+ str(bond_1_list[i])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(bond_2_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Bond Parameters" + "\n")
xml.write("Begin writing the Angle Parameters" + "\n")
for i in range(len(k_angle_list)):
xml.write(
" "
+ "<Angle"
+ " "
+ "a="
+ '"'
+ str(angle_list[i])
+ '"'
+ " "
+ "k="
+ '"'
+ str(k_angle_list[i])
+ '"'
+ " "
+ "p1="
+ '"'
+ str(angle_1_list[i])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(angle_2_list[i])
+ '"'
+ " "
+ "p3="
+ '"'
+ str(angle_3_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Angle Parameters" + "\n")
xml.write("Begin writing the Charge Parameters" + "\n")
for i in range(len(qm_charges)):
xml.write(
"<Particle"
+ " "
+ "q="
+ '"'
+ str(qm_charges[i])
+ '"'
+ " "
+ "eps="
+ '"'
+ str(0.00)
+ '"'
+ " "
+ "sig="
+ '"'
+ str(0.00)
+ '"'
+ " "
+ "atom="
+ '"'
+ str(atom_name_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Charge Parameters" + "\n")
xml.close()
[docs] def write_reparameterised_system_xml(self):
"""
Writes a reparameterised XML force field file for the ligand.
"""
# Bond Parameters
f_params = open(self.system_qm_params_file, "r")
lines_params = f_params.readlines()
# Bond Parameters
for i in range(len(lines_params)):
if "Begin writing the Bond Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Bond Parameters" in lines_params[i]:
to_end = int(i)
bond_params = lines_params[to_begin + 1 : to_end]
index_search_replace_bond = []
for i in bond_params:
bond_line_to_replace = i
# print(bond_line_to_replace)
atom_number_list = [
re.findall("\d*\.?\d+", i)[3],
re.findall("\d*\.?\d+", i)[5],
]
# print(atom_number_list)
comb_1 = (
"p1="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[1]
+ '"'
+ "/>"
)
comb_2 = (
"p1="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[0]
+ '"'
+ "/>"
)
comb_list_bond = [comb_1, comb_2]
# print(comb_list_bond)
list_search_bond = [
search_in_file(file=self.system_xml, word=comb_1),
search_in_file(file=self.system_xml, word=comb_2),
]
# print(list_search_bond)
for j in range(len(list_search_bond)):
if list_search_bond[j] != []:
to_add = (list_search_bond[j], i)
# print(to_add)
index_search_replace_bond.append(to_add)
# Angle Parameters
for i in range(len(lines_params)):
if "Begin writing the Angle Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Angle Parameters" in lines_params[i]:
to_end = int(i)
angle_params = lines_params[to_begin + 1 : to_end]
index_search_replace_angle = []
for i in angle_params:
angle_line_to_replace = i
# print(angle_line_to_replace)
index_search_replace_angle = []
for i in angle_params:
angle_line_to_replace = i
# print(angle_line_to_replace)
atom_number_list = [
re.findall("\d*\.?\d+", i)[3],
re.findall("\d*\.?\d+", i)[5],
re.findall("\d*\.?\d+", i)[7],
]
# print(atom_number_list)
comb_1 = (
"p1="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[2]
+ '"'
+ "/>"
)
comb_2 = (
"p1="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[1]
+ '"'
+ "/>"
)
comb_3 = (
"p1="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[2]
+ '"'
+ "/>"
)
comb_4 = (
"p1="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[0]
+ '"'
+ "/>"
)
comb_5 = (
"p1="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[1]
+ '"'
+ "/>"
)
comb_6 = (
"p1="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[0]
+ '"'
+ "/>"
)
comb_list_angle = [
comb_1,
comb_2,
comb_3,
comb_4,
comb_5,
comb_6,
]
# print(comb_list_angle)
list_search_angle = [
search_in_file(file=self.system_xml, word=comb_1),
search_in_file(file=self.system_xml, word=comb_2),
search_in_file(file=self.system_xml, word=comb_3),
search_in_file(file=self.system_xml, word=comb_4),
search_in_file(file=self.system_xml, word=comb_5),
search_in_file(file=self.system_xml, word=comb_6),
]
# print(list_search_angle)
for j in range(len(list_search_angle)):
if list_search_angle[j] != []:
to_add = (list_search_angle[j], i)
# print(to_add)
index_search_replace_angle.append(to_add)
f_org = open(self.system_xml)
lines = f_org.readlines()
for i in range(len(index_search_replace_bond)):
line_number = index_search_replace_bond[i][0][0][0] - 1
line_to_replace = index_search_replace_bond[i][0][0][1]
line_to_replace_with = index_search_replace_bond[i][1]
lines[line_number] = line_to_replace_with
for i in range(len(index_search_replace_angle)):
line_number = index_search_replace_angle[i][0][0][0] - 1
line_to_replace = index_search_replace_angle[i][0][0][1]
line_to_replace_with = index_search_replace_angle[i][1]
lines[line_number] = line_to_replace_with
f_cop = open(self.reparameterised_intermediate_system_xml_file, "w")
for i in lines:
f_cop.write(i)
f_cop.close()
f_params = open(self.system_qm_params_file)
lines_params = f_params.readlines()
# Charge Parameters
for i in range(len(lines_params)):
if "Begin writing the Charge Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Charge Parameters" in lines_params[i]:
to_end = int(i)
charge_params = lines_params[to_begin + 1 : to_end]
non_bonded_index = []
for k in charge_params:
non_bonded_index.append(int(re.findall("[-+]?\d*\.\d+|\d+", k)[3]))
charge_for_index = []
for k in charge_params:
charge_for_index.append(
float(re.findall("[-+]?\d*\.\d+|\d+", k)[0])
)
xml_off = open(self.system_xml)
xml_off_lines = xml_off.readlines()
for i in range(len(xml_off_lines)):
if "<GlobalParameters/>" in xml_off_lines[i]:
to_begin = int(i)
if "<Exceptions>" in xml_off_lines[i]:
to_end = int(i)
nonbond_params = xml_off_lines[to_begin + 4 : to_end - 1]
# print(len(nonbond_params))
f_non_bonded = open(self.system_xml_non_bonded_file, "w")
for x in nonbond_params:
f_non_bonded.write(x)
f_non_bonded = open(self.system_xml_non_bonded_file)
lines_non_bonded = f_non_bonded.readlines()
# print(len(lines_non_bonded))
lines_non_bonded_to_write = []
for i in range(len(non_bonded_index)):
line_ = lines_non_bonded[non_bonded_index[i]]
# print(line_)
eps = float(re.findall("[-+]?\d*\.\d+|\d+", line_)[0])
sig = float(re.findall("[-+]?\d*\.\d+|\d+", line_)[2])
line_to_replace = (
" "
+ "<Particle "
+ "eps="
+ '"'
+ str(eps)
+ '"'
+ " "
+ "q="
+ '"'
+ str(charge_for_index[i])
+ '"'
+ " "
+ "sig="
+ '"'
+ str(sig)
+ '"'
+ "/>"
)
lines_non_bonded_to_write.append(line_to_replace)
data_ = list(zip(non_bonded_index, lines_non_bonded_to_write))
df_non_bonded_params = pd.DataFrame(
data_, columns=["line_index", "line"]
)
# print(df_non_bonded_params.head())
f_non_bonded_ = open(self.system_xml_non_bonded_file)
lines_non_bonded_ = f_non_bonded_.readlines()
for i in range(len(lines_non_bonded_)):
if i in non_bonded_index:
lines_non_bonded_[i] = (
df_non_bonded_params.loc[
df_non_bonded_params.line_index == i, "line"
].values[0]
) + "\n"
# print(len(lines_non_bonded_))
f_write_non_bonded_reparams = open(
self.system_xml_non_bonded_reparams_file, "w"
)
for p in range(len(lines_non_bonded_)):
f_write_non_bonded_reparams.write(lines_non_bonded_[p])
f_write_non_bonded_reparams.close()
f_ = open(self.system_xml_non_bonded_reparams_file)
lines_ = f_.readlines()
print(len(lines_) == len(lines_non_bonded))
xml_off = open(self.reparameterised_intermediate_system_xml_file)
xml_off_lines = xml_off.readlines()
for i in range(len(xml_off_lines)):
if "<GlobalParameters/>" in xml_off_lines[i]:
to_begin = int(i)
if "<Exceptions>" in xml_off_lines[i]:
to_end = int(i)
lines_before_params = xml_off_lines[: to_begin + 4]
f__ = open(self.system_xml_non_bonded_reparams_file)
lines_params_non_bonded = f__.readlines()
lines_after_params = xml_off_lines[to_end - 1 :]
f_reparams_xml = open(self.reparameterised_system_xml_file, "w")
for x in lines_before_params:
f_reparams_xml.write(x)
for x in lines_params_non_bonded:
f_reparams_xml.write(x)
for x in lines_after_params:
f_reparams_xml.write(x)
f_reparams_xml.close()
[docs] def save_amber_params(self):
"""
Saves amber generated topology files for the ligand.
"""
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
openmm_system.save(self.prmtop_system_non_params, overwrite=True)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(self.inpcrd_system_non_params, overwrite=True)
parm = parmed.load_file(
self.prmtop_system_non_params, self.inpcrd_system_non_params,
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_non_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_non_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(self.reparameterised_system_xml_file),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(self.reparameterised_system_xml_file),
)
openmm_system.save(self.prmtop_system_params, overwrite=True)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(self.inpcrd_system_params, overwrite=True)
parm = parmed.load_file(
self.prmtop_system_params, self.inpcrd_system_params
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(self.reparameterised_system_xml_file),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)
[docs] def analyze_diff_energies(self):
"""
Compares the energies of the ligand obtained from the non-parameterized
and the parameterized force field files.
"""
parm_non_params = parmed.load_file(
self.prmtop_system_non_params, self.inpcrd_system_non_params,
)
prmtop_energy_decomposition_non_params = parmed.openmm.energy_decomposition_system(
parm_non_params, parm_non_params.createSystem()
)
prmtop_energy_decomposition_non_params_value = [
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_non_params
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_non_params
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_non_params
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_non_params
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_non_params_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_non_params = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_non_params_list,
prmtop_energy_decomposition_non_params_value,
)
),
columns=["Energy_term", "Energy_parm_non_params"],
)
df_energy_non_params = df_energy_non_params.set_index("Energy_term")
# print(df_energy_non_params)
parm_params = parmed.load_file(
self.prmtop_system_params, self.inpcrd_system_params
)
prmtop_energy_decomposition_params = parmed.openmm.energy_decomposition_system(
parm_params, parm_params.createSystem()
)
prmtop_energy_decomposition_params_value = [
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_params
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_params
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_params
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem)
for elem in prmtop_energy_decomposition_params
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_params_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_params = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_params_list,
prmtop_energy_decomposition_params_value,
)
),
columns=["Energy_term", "Energy_parm_params"],
)
df_energy_params = df_energy_params.set_index("Energy_term")
# print(df_energy_params)
df_compare = pd.concat(
[df_energy_non_params, df_energy_params], axis=1
)
df_compare["Energy_difference"] = df_compare[
"Energy_parm_non_params"
].sub(df_compare["Energy_parm_params"], axis=0)
print(df_compare)
[docs]class RunOpenMMSims:
"""
A class used to run the OpenMM simulation on any specified system.
This class contain methods to run a MD simulation to confirm the
proper structure of the reparameterized forcefield files.
...
Attributes
----------
system_prmtop : str
Topology file of the system (receptor, ligand or
receptor - ligand complex)
system_inpcrd : str
Coordinate file of the system (receptor, ligand or
receptor - ligand complex)
system_pdb: str
PDB file of the system to run MD simulation (receptor,
ligand or receptor - ligand complex).
system_xml: str
Serialised XML file for the system.
sim_output: str, optional
PDB file containing the trajectory coordinates for the OpenMM
simulation.
sim_steps: str, optional
Number of steps in the OpenMM MD simulation.
"""
[docs] def __init__(
self,
system_prmtop,
system_inpcrd,
system_pdb,
system_xml,
system_output="sim_output.pdb",
sim_steps=1000,
):
self.system_prmtop = system_prmtop
self.system_inpcrd = system_inpcrd
self.system_pdb = system_pdb
self.system_xml = system_xml
self.system_output = system_output
self.sim_steps = sim_steps
[docs] def run_openmm_prmtop_inpcrd(self):
"""
Runs OpenMM MD simulation with prmtop and inpcrd file.
"""
print(
"Running OpenMM simulation for "
+ self.system_prmtop
+ " and "
+ self.system_inpcrd
)
prmtop = simtk.openmm.app.AmberPrmtopFile(self.system_prmtop)
inpcrd = simtk.openmm.app.AmberInpcrdFile(self.system_inpcrd)
system = prmtop.createSystem()
integrator = simtk.openmm.LangevinIntegrator(
300 * simtk.unit.kelvin,
1 / simtk.unit.picosecond,
0.002 * simtk.unit.picoseconds,
)
simulation = simtk.openmm.app.Simulation(
prmtop.topology, system, integrator
)
simulation.context.setPositions(inpcrd.positions)
if inpcrd.boxVectors is not None:
simulation.context.setPeriodicBoxVectors(*inpcrd.boxVectors)
simulation.minimizeEnergy(maxIterations=100000)
simulation.reporters.append(
simtk.openmm.app.PDBReporter(
self.system_output, self.sim_steps / 10
)
)
simulation.reporters.append(
simtk.openmm.app.StateDataReporter(
stdout,
reportInterval=int(self.sim_steps / 10),
step=True,
potentialEnergy=True,
temperature=True,
)
)
simulation.step(self.sim_steps)
command = "rm -rf " + self.system_output
os.system(command)
[docs] def run_openmm_prmtop_pdb(self):
"""
Runs OpenMM MD simulation with prmtop and PDB file.
"""
print(
"Running OpenMM simulation for "
+ self.system_prmtop
+ " and "
+ self.system_pdb
)
pdb = simtk.openmm.app.PDBFile(self.system_pdb)
prmtop = simtk.openmm.app.AmberPrmtopFile(self.system_prmtop)
system = prmtop.createSystem()
integrator = simtk.openmm.LangevinIntegrator(
300 * simtk.unit.kelvin,
1 / simtk.unit.picosecond,
0.002 * simtk.unit.picoseconds,
)
simulation = simtk.openmm.app.Simulation(
prmtop.topology, system, integrator
)
simulation.context.setPositions(pdb.positions)
simulation.minimizeEnergy(maxIterations=100000)
simulation.reporters.append(
simtk.openmm.app.PDBReporter(
self.system_output, self.sim_steps / 10
)
)
simulation.reporters.append(
simtk.openmm.app.StateDataReporter(
stdout,
reportInterval=int(self.sim_steps / 10),
step=True,
potentialEnergy=True,
temperature=True,
)
)
simulation.step(self.sim_steps)
command = "rm -rf " + self.system_output
os.system(command)
[docs] def run_openmm_xml_pdb(self):
"""
Runs OpenMM MD simulation with XML and PDB file.
"""
print(
"Running OpenMM simulation for "
+ self.system_xml
+ " and "
+ self.system_pdb
)
pdb = simtk.openmm.app.PDBFile(self.system_pdb)
ff_xml_file = open(self.system_xml, "r")
system = simtk.openmm.XmlSerializer.deserialize(ff_xml_file.read())
integrator = simtk.openmm.LangevinIntegrator(
300 * simtk.unit.kelvin,
1 / simtk.unit.picosecond,
0.002 * simtk.unit.picoseconds,
)
simulation = simtk.openmm.app.Simulation(
pdb.topology, system, integrator
)
simulation.context.setPositions(pdb.positions)
simulation.minimizeEnergy(maxIterations=100000)
simulation.reporters.append(
simtk.openmm.app.PDBReporter(
self.system_output, self.sim_steps / 10
)
)
simulation.reporters.append(
simtk.openmm.app.StateDataReporter(
stdout,
reportInterval=int(self.sim_steps / 10),
step=True,
potentialEnergy=True,
temperature=True,
)
)
simulation.step(self.sim_steps)
command = "rm -rf " + self.system_output
os.system(command)
[docs]class MergeHostGuestTopology:
"""
A class used to merge the host and guest topology and coordinate
files.
...
Attributes
----------
host_prmtop : str
Topology file of the receptor.
guest_prmtop : str
Topology file of the ligand.
host_inpcrd : str
Coordinate file of the receptor.
guest_inpcrd : str
Coordinate file of the ligand.
system_prmtop : str
Topology file of the receptor - ligand complex.
system_inpcrd : str
Coordinate file of the receptor - ligand complex.
"""
[docs] def __init__(
self,
host_prmtop,
guest_prmtop,
host_inpcrd,
guest_inpcrd,
system_prmtop,
system_inpcrd,
):
self.host_prmtop = host_prmtop
self.guest_prmtop = guest_prmtop
self.host_inpcrd = host_inpcrd
self.guest_inpcrd = guest_inpcrd
self.system_prmtop = system_prmtop
self.system_inpcrd = system_inpcrd
[docs] def merge_topology_files(self):
"""
Merge the host and guest topology and coordinate files.
"""
print(
"Merging the "
+ self.host_prmtop
+ " "
+ self.guest_prmtop
+ " files"
)
print(
"Merging the "
+ self.host_inpcrd
+ " "
+ self.guest_inpcrd
+ " files"
)
host_system = parmed.load_file(self.host_prmtop, xyz=self.host_inpcrd)
guest_system = parmed.load_file(
self.guest_prmtop, xyz=self.guest_inpcrd
)
system = host_system + guest_system
system.save(self.system_prmtop, overwrite=True)
system.save(self.system_inpcrd, overwrite=True)
[docs]class TorsionDriveSims:
"""
A class used to create a filetree for torsion scan
using torsionsdrive for the dihedral angles of the ligand.
This class creates a directory for carrying out torsiondrive
calculations followed by fitting of torsional parameters. Methods
in this class are used to run torsiondrive calculations either for
all of the torsional angles, or for non-hydrogen / heavy atoms
contributing to the torsional angle.
...
Attributes
----------
charge : int, optional
Charge of the ligand.
multiplicity: int, optional
Spin Multiplicity (2S+1) of the ligand where S represents
the total spin of the ligand.
reparameterised_system_xml_file : str, optional
Reparamaterixed XML force field for the ligand.
torsion_xml_file : str, optional
A text file containing torsional parameters from
reparameterised XML file.
xyz_file : str, optional
XYZ file containing the coordinates of the guest molecule.
psi_input_file : str, optional
Input file for psi4 QM engine.
memory : int, optional
Memory (in GB) to be used.
basis_set: str, optional
Basis set to use for the QM engine.
functional: str, optional
Exchange/Correlation or hybrid Functional for the QM engine.
iterations : int, optional
Maximum number of geometry optimization steps.
method_torsion_drive : str, optional
The algorithm/package to use while running the torsiondrive
scan. Using --native_opt uses QM program native constrained
optimization algorithm and turns off geomeTRIC package.
system_bonds_file : str, optional
Text file containing bond parameters for the ligand.
tor_dir : str, optional
Torsiondrive directory containing separate torsiondrive
folders, each containing files for a separate torsiondrive
calculation for a particular dihedral angle.
dihedral_text_file : str, optional
Dihedral information file for torsiondrive.
template_pdb : str, optional
Guest PDB with atoms beginning from 1 to be used as a
template PDB to retrieve atom indices and symbols.
torsion_drive_run_file : str, optional
bash file for torsiondrive calculations.
dihedral_interval : int, optional
Grid spacing for dihedral scan, i.e. every n degrees
(where n is an integer), multiple values will be mapped
to each dihedral angle.
engine : str, optional
Engine for running torsiondrive scan.
energy_threshold : float, optional
Only activate grid points if the new optimization is lower than
the previous lowest energy (in a.u.).
"""
[docs] def __init__(
self,
charge=0,
multiplicity=1,
reparameterised_system_xml_file="guest_reparameterised.xml",
torsion_xml_file="guest_torsion_xml.txt",
xyz_file="guest_coords.xyz",
psi_input_file="torsion_drive_input.dat",
memory=50,
basis_set="6-31G",
functional="B3LYP",
iterations=2000,
method_torsion_drive="native_opt",
system_bonds_file="guest_bonds.txt",
tor_dir="torsion_dir",
dihedral_text_file="dihedrals.txt",
template_pdb="guest_init_II.pdb",
torsion_drive_run_file="run_command",
dihedral_interval=15,
engine="psi4",
energy_threshold=0.00001,
):
self.charge = charge
self.multiplicity = multiplicity
self.reparameterised_system_xml_file = reparameterised_system_xml_file
self.torsion_xml_file = torsion_xml_file
self.xyz_file = xyz_file
self.psi_input_file = psi_input_file
self.memory = memory
self.basis_set = basis_set
self.functional = functional
self.iterations = iterations
self.method_torsion_drive = method_torsion_drive
self.system_bonds_file = system_bonds_file
self.tor_dir = tor_dir
self.dihedral_text_file = dihedral_text_file
self.template_pdb = template_pdb
self.torsion_drive_run_file = torsion_drive_run_file
self.dihedral_interval = dihedral_interval
self.engine = engine
self.energy_threshold = energy_threshold
[docs] def write_torsion_drive_run_file(self):
"""
Saves a bash file for running torsion scans for torsiondrive.
"""
if self.method_torsion_drive == "geometric":
torsion_command = (
"torsiondrive-launch"
+ " "
+ self.psi_input_file
+ " "
+ self.dihedral_text_file
+ " "
+ "-g"
+ " "
+ str(self.dihedral_interval)
+ " "
+ "-e"
+ " "
+ self.engine
+ " "
+ "--energy_thresh"
+ " "
+ str(self.energy_threshold)
+ " "
+ "-v"
)
if self.method_torsion_drive == "native_opt":
torsion_command = (
"torsiondrive-launch"
+ " "
+ self.psi_input_file
+ " "
+ self.dihedral_text_file
+ " "
+ "-g"
+ " "
+ str(self.dihedral_interval)
+ " "
+ "-e"
+ " "
+ self.engine
+ " "
+ "--energy_thresh"
+ " "
+ str(self.energy_threshold)
+ " "
+ "--"
+ self.method_torsion_drive
+ " "
+ "-v"
)
print(torsion_command)
with open(self.torsion_drive_run_file, "w") as f:
f.write(torsion_command)
[docs] def write_tor_params_txt(self):
"""
Saves a text file containing torsional parameters from the reparameterized XML
force field file.
"""
xml_off = open(self.reparameterised_system_xml_file, "r")
xml_off_lines = xml_off.readlines()
for i in range(len(xml_off_lines)):
if "<Torsions>" in xml_off_lines[i]:
to_begin = int(i)
if "</Torsions>" in xml_off_lines[i]:
to_end = int(i)
torsion_params = xml_off_lines[to_begin + 1 : to_end]
k_list_off = []
for i in range(len(torsion_params)):
k_list_off.append(
float(re.findall("\d*\.?\d+", torsion_params[i])[0])
)
k_list_off = [round(num, 10) for num in k_list_off]
# print(k_list_off)
p1 = []
for i in range(len(torsion_params)):
p1.append(int(re.findall("\d*\.?\d+", torsion_params[i])[2]))
p1 = [i + 1 for i in p1]
# print(p1)
p2 = []
for i in range(len(torsion_params)):
p2.append(int(re.findall("\d*\.?\d+", torsion_params[i])[4]))
p2 = [i + 1 for i in p2]
# print(p2)
p3 = []
for i in range(len(torsion_params)):
p3.append(int(re.findall("\d*\.?\d+", torsion_params[i])[6]))
p3 = [i + 1 for i in p3]
# print(p3)
p4 = []
for i in range(len(torsion_params)):
p4.append(int(re.findall("\d*\.?\d+", torsion_params[i])[8]))
p4 = [i + 1 for i in p4]
# print(p4)
periodicity = []
for i in range(len(torsion_params)):
periodicity.append(
int(re.findall("\d*\.?\d+", torsion_params[i])[9])
)
# print(periodicity)
phase = []
for i in range(len(torsion_params)):
phase.append(float(re.findall("\d*\.?\d+", torsion_params[i])[10]))
phase = [round(num, 8) for num in phase]
# print(phase)
data_tuples = list(zip(k_list_off, p1, p2, p3, p4, periodicity, phase))
df_tor = pd.DataFrame(
data_tuples,
columns=["k", "p1", "p2", "p3", "p4", "periodicity", "phase",],
)
# print(df_tor.head())
df_tor.to_csv(
self.torsion_xml_file, index=False, header=False, sep=" "
)
[docs] def create_torsion_drive_dir(self):
"""
Creates a directory for carrying out torsiondrive
calculations for all the proper dihedral angles.
"""
df_tor = pd.read_csv(
self.torsion_xml_file, header=None, delimiter=r"\s+"
)
df_tor.columns = [
"k",
"p1",
"p2",
"p3",
"p4",
"periodicity",
"phase",
]
# print(df_tor.head())
df_dihedrals = df_tor[["p1", "p2", "p3", "p4"]]
# print(df_dihedrals.head())
dihedrals_list_list = []
for i in range(len(df_dihedrals)):
dihedrals_list_list.append(df_dihedrals.iloc[i].values.tolist())
set_list = set()
unique_dihedrals_list_list = []
for x in dihedrals_list_list:
srtd = tuple(sorted(x))
if srtd not in set_list:
unique_dihedrals_list_list.append(x)
set_list.add(srtd)
# print(unique_dihedrals_list_list)
os.system("rm -rf " + self.tor_dir)
os.system("mkdir " + self.tor_dir)
parent_cwd = os.getcwd()
shutil.copy(
parent_cwd + "/" + self.psi_input_file,
parent_cwd + "/" + self.tor_dir + "/" + self.psi_input_file,
)
shutil.copy(
parent_cwd + "/" + self.template_pdb,
parent_cwd + "/" + self.tor_dir + "/" + self.template_pdb,
)
shutil.copy(
parent_cwd + "/" + self.torsion_drive_run_file,
parent_cwd
+ "/"
+ self.tor_dir
+ "/"
+ self.torsion_drive_run_file,
)
os.chdir(parent_cwd + "/" + self.tor_dir)
torsion_drive_dir = os.getcwd()
for i in range(len(unique_dihedrals_list_list)):
dir_name = "torsion_drive" + "_" + str(i)
os.system("rm -rf " + dir_name)
os.system("mkdir " + dir_name)
os.chdir(torsion_drive_dir + "/" + dir_name)
with open(self.dihedral_text_file, "w") as f:
f.write(
"# dihedral definition by atom indices starting from 1"
+ "\n"
)
f.write("# i j k l" + "\n")
i_ = unique_dihedrals_list_list[i][0]
j_ = unique_dihedrals_list_list[i][1]
k_ = unique_dihedrals_list_list[i][2]
l_ = unique_dihedrals_list_list[i][3]
f.write(
" "
+ "{:< 6d}".format(i_)
+ "{:< 6d}".format(j_)
+ "{:< 6d}".format(k_)
+ "{:< 6d}".format(l_)
+ "\n"
)
shutil.copy(
torsion_drive_dir + "/" + self.psi_input_file,
torsion_drive_dir
+ "/"
+ dir_name
+ "/"
+ self.psi_input_file,
)
shutil.copy(
torsion_drive_dir + "/" + self.template_pdb,
torsion_drive_dir
+ "/"
+ dir_name
+ "/"
+ self.template_pdb,
)
shutil.copy(
torsion_drive_dir + "/" + self.torsion_drive_run_file,
torsion_drive_dir
+ "/"
+ dir_name
+ "/"
+ self.torsion_drive_run_file,
)
os.chdir(torsion_drive_dir)
os.system("rm -rf " + self.psi_input_file)
os.system("rm -rf " + self.template_pdb)
os.system("rm -rf " + self.torsion_drive_run_file)
os.chdir(parent_cwd)
[docs] def create_non_H_torsion_drive_dir(self):
"""
Creates a directory for carrying out torsiondrive
calculations for all non-hydrogen torsional angles.
"""
df_tor = pd.read_csv(
self.torsion_xml_file, header=None, delimiter=r"\s+"
)
df_tor.columns = [
"k",
"p1",
"p2",
"p3",
"p4",
"periodicity",
"phase",
]
# print(df_tor.head())
ppdb = PandasPdb()
ppdb.read_pdb(self.template_pdb)
df_index_symbol = ppdb.df["ATOM"][["atom_number", "element_symbol"]]
# print(df_index_symbol.head())
df_dihedrals = df_tor[["p1", "p2", "p3", "p4"]]
# print(df_dihedrals.head())
dihedrals_list_list = []
for i in range(len(df_dihedrals)):
dihedrals_list_list.append(df_dihedrals.iloc[i].values.tolist())
set_list = set()
unique_dihedrals_list_list = []
for x in dihedrals_list_list:
srtd = tuple(sorted(x))
if srtd not in set_list:
unique_dihedrals_list_list.append(x)
set_list.add(srtd)
# print(unique_dihedrals_list_list)
atom_dihedral_list = []
for sub_list in unique_dihedrals_list_list:
atom_dihedral_list.append(
[
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[0]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[1]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[2]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[3]
]["element_symbol"].to_list()[0],
]
)
# print(atom_dihedral_list)
index_to_include = []
for i in range(len(atom_dihedral_list)):
if "H" not in atom_dihedral_list[i]:
index_to_include.append(i)
non_H_dihedrals = []
for i in index_to_include:
non_H_dihedrals.append(unique_dihedrals_list_list[i])
# print(non_H_dihedrals)
non_H_atom_dihedral_list = []
for sub_list in non_H_dihedrals:
non_H_atom_dihedral_list.append(
[
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[0]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[1]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[2]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[3]
]["element_symbol"].to_list()[0],
]
)
print(non_H_atom_dihedral_list)
os.system("rm -rf " + self.tor_dir)
os.system("mkdir " + self.tor_dir)
parent_cwd = os.getcwd()
shutil.copy(
parent_cwd + "/" + self.psi_input_file,
parent_cwd + "/" + self.tor_dir + "/" + self.psi_input_file,
)
shutil.copy(
parent_cwd + "/" + self.template_pdb,
parent_cwd + "/" + self.tor_dir + "/" + self.template_pdb,
)
shutil.copy(
parent_cwd + "/" + self.torsion_drive_run_file,
parent_cwd
+ "/"
+ self.tor_dir
+ "/"
+ self.torsion_drive_run_file,
)
os.chdir(parent_cwd + "/" + self.tor_dir)
torsion_drive_dir = os.getcwd()
for i in range(len(non_H_dihedrals)):
dir_name = "torsion_drive" + "_" + str(i)
os.system("rm -rf " + dir_name)
os.system("mkdir " + dir_name)
os.chdir(torsion_drive_dir + "/" + dir_name)
with open(self.dihedral_text_file, "w") as f:
f.write(
"# dihedral definition by atom indices starting from 1"
+ "\n"
)
f.write("# i j k l" + "\n")
i_ = non_H_dihedrals[i][0]
j_ = non_H_dihedrals[i][1]
k_ = non_H_dihedrals[i][2]
l_ = non_H_dihedrals[i][3]
f.write(
" "
+ "{:< 6d}".format(i_)
+ "{:< 6d}".format(j_)
+ "{:< 6d}".format(k_)
+ "{:< 6d}".format(l_)
+ "\n"
)
shutil.copy(
torsion_drive_dir + "/" + self.psi_input_file,
torsion_drive_dir
+ "/"
+ dir_name
+ "/"
+ self.psi_input_file,
)
shutil.copy(
torsion_drive_dir + "/" + self.template_pdb,
torsion_drive_dir
+ "/"
+ dir_name
+ "/"
+ self.template_pdb,
)
shutil.copy(
torsion_drive_dir + "/" + self.torsion_drive_run_file,
torsion_drive_dir
+ "/"
+ dir_name
+ "/"
+ self.torsion_drive_run_file,
)
os.chdir(torsion_drive_dir)
os.system("rm -rf " + self.psi_input_file)
os.system("rm -rf " + self.template_pdb)
os.system("rm -rf " + self.torsion_drive_run_file)
os.chdir(parent_cwd)
[docs] def create_non_H_bonded_torsion_drive_dir(self):
"""
Creates a directory for carrying out torsiondrive
calculations for all non-hydrogen bonded torsional angles.
"""
df_tor = pd.read_csv(
self.torsion_xml_file, header=None, delimiter=r"\s+"
)
df_tor.columns = [
"k",
"p1",
"p2",
"p3",
"p4",
"periodicity",
"phase",
]
# print(df_tor.head())
ppdb = PandasPdb()
ppdb.read_pdb(self.template_pdb)
df_index_symbol = ppdb.df["ATOM"][["atom_number", "element_symbol"]]
# print(df_index_symbol.head())
df_dihedrals = df_tor[["p1", "p2", "p3", "p4"]]
# print(df_dihedrals.head())
dihedrals_list_list = []
for i in range(len(df_dihedrals)):
dihedrals_list_list.append(df_dihedrals.iloc[i].values.tolist())
set_list = set()
unique_dihedrals_list_list = []
for x in dihedrals_list_list:
srtd = tuple(sorted(x))
if srtd not in set_list:
unique_dihedrals_list_list.append(x)
set_list.add(srtd)
# print(unique_dihedrals_list_list)
atom_dihedral_list = []
for sub_list in unique_dihedrals_list_list:
atom_dihedral_list.append(
[
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[0]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[1]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[2]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[3]
]["element_symbol"].to_list()[0],
]
)
# print(atom_dihedral_list)
index_to_include = []
for i in range(len(atom_dihedral_list)):
if "H" not in atom_dihedral_list[i]:
index_to_include.append(i)
non_H_dihedrals = []
for i in index_to_include:
non_H_dihedrals.append(unique_dihedrals_list_list[i])
# print(non_H_dihedrals)
non_H_atom_dihedral_list = []
for sub_list in non_H_dihedrals:
non_H_atom_dihedral_list.append(
[
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[0]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[1]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[2]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[3]
]["element_symbol"].to_list()[0],
]
)
# print(non_H_atom_dihedral_list)
df_bonds_all = pd.read_csv(
self.system_bonds_file, header=None, delimiter=r"\s+"
)
df_bonds_all.columns = [
"bond_names",
"k",
"angle",
"b1",
"b2",
]
df_bonds = df_bonds_all[["b1", "b2"]]
bonds_list_list = []
for i in range(len(df_bonds)):
bonds_list_list.append(df_bonds.iloc[i].values.tolist())
# print(bonds_list_list)
reverse_bond_list_list = []
for i in bonds_list_list:
reverse_bond_list_list.append(reverse_list(i))
# print(reverse_bond_list_list)
bond_list = bonds_list_list + reverse_bond_list_list
# print(bond_list)
non_H_dihedral_bonds_list = []
for i in non_H_dihedrals:
non_H_dihedral_bonds_list.append(
[[i[0], i[1]], [i[1], i[2]], [i[2], i[3]]]
)
# print(non_H_dihedral_bonds_list)
bonded_index_to_include = []
for i in range(len(non_H_dihedral_bonds_list)):
if [
non_H_dihedral_bonds_list[i][0] in bond_list,
non_H_dihedral_bonds_list[i][1] in bond_list,
non_H_dihedral_bonds_list[i][2] in bond_list,
] == [True, True, True]:
bonded_index_to_include.append(i)
# print(bonded_index_to_include)
non_H_bonded_dihedrals = []
for i in bonded_index_to_include:
non_H_bonded_dihedrals.append(non_H_dihedrals[i])
# print(non_H_bonded_dihedrals)
non_H_bonded_atom_dihedral_list = []
for sub_list in non_H_bonded_dihedrals:
non_H_bonded_atom_dihedral_list.append(
[
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[0]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[1]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[2]
]["element_symbol"].to_list()[0],
df_index_symbol.loc[
df_index_symbol["atom_number"] == sub_list[3]
]["element_symbol"].to_list()[0],
]
)
print(non_H_bonded_atom_dihedral_list)
os.system("rm -rf " + self.tor_dir)
os.system("mkdir " + self.tor_dir)
parent_cwd = os.getcwd()
shutil.copy(
parent_cwd + "/" + self.psi_input_file,
parent_cwd + "/" + self.tor_dir + "/" + self.psi_input_file,
)
shutil.copy(
parent_cwd + "/" + self.template_pdb,
parent_cwd + "/" + self.tor_dir + "/" + self.template_pdb,
)
shutil.copy(
parent_cwd + "/" + self.torsion_drive_run_file,
parent_cwd
+ "/"
+ self.tor_dir
+ "/"
+ self.torsion_drive_run_file,
)
os.chdir(parent_cwd + "/" + self.tor_dir)
torsion_drive_dir = os.getcwd()
for i in range(len(non_H_bonded_dihedrals)):
dir_name = "torsion_drive" + "_" + str(i)
os.system("rm -rf " + dir_name)
os.system("mkdir " + dir_name)
os.chdir(torsion_drive_dir + "/" + dir_name)
with open(self.dihedral_text_file, "w") as f:
f.write(
"# dihedral definition by atom indices starting from 1"
+ "\n"
)
f.write("# i j k l" + "\n")
i_ = non_H_bonded_dihedrals[i][0]
j_ = non_H_bonded_dihedrals[i][1]
k_ = non_H_bonded_dihedrals[i][2]
l_ = non_H_bonded_dihedrals[i][3]
f.write(
" "
+ "{:< 6d}".format(i_)
+ "{:< 6d}".format(j_)
+ "{:< 6d}".format(k_)
+ "{:< 6d}".format(l_)
+ "\n"
)
shutil.copy(
torsion_drive_dir + "/" + self.psi_input_file,
torsion_drive_dir
+ "/"
+ dir_name
+ "/"
+ self.psi_input_file,
)
shutil.copy(
torsion_drive_dir + "/" + self.template_pdb,
torsion_drive_dir
+ "/"
+ dir_name
+ "/"
+ self.template_pdb,
)
shutil.copy(
torsion_drive_dir + "/" + self.torsion_drive_run_file,
torsion_drive_dir
+ "/"
+ dir_name
+ "/"
+ self.torsion_drive_run_file,
)
os.chdir(torsion_drive_dir)
os.system("rm -rf " + self.psi_input_file)
os.system("rm -rf " + self.template_pdb)
os.system("rm -rf " + self.torsion_drive_run_file)
os.chdir(parent_cwd)
[docs] def run_torsion_sim(self):
"""
Run torsion scans using torsiondrive locally.
"""
parent_cwd = os.getcwd()
target_dir = parent_cwd + "/" + self.tor_dir
num_folders = 0
for _, dirnames, filenames in os.walk(target_dir):
num_folders += len(dirnames)
for i in range(num_folders):
dir_ = "torsion_drive" + "_" + str(i)
os.chdir(parent_cwd + "/" + self.tor_dir + "/" + dir_)
run_command = "bash" + " " + self.torsion_drive_run_file
# os.system(run_command)
print(run_command)
os.chdir(parent_cwd)
[docs]class TorsionDriveParams:
"""
A class used to parameterize the torsional parameters
of the ligand by fitting the torsional parameters obtained
from torsiondrive calculations.
Previously obtained reparameterized XML forcefield file did
not have the torsional parameters obtained from QM calculations.
The torsional parameters obtained from torsiondrive scans are
fitted and a new XML forcefield file is generated.
...
Attributes
----------
num_charge_atoms : int, optional
Number of charged atoms in the molecule.
index_charge_atom_1: int, optional
Index of the first charged atom.
charge_atom_1 : int, optional
Charge on the first charged atom.
tor_dir : str, optional
Torsiondrive directory containing separate torsiondrive folders,
each containing files for a separate torsiondrive calculation
for a particular dihedral angle.
reparameterized_torsional_params_file : str, optional
Text file containing the forcefield parameters for the
ligand previously obtained without torsional reparameterization.
psi_input_file : str, optional
Input file for psi4 QM engine.
xyz_file : str, optional
XYZ file for ligand coordinates.
coords_file : str, optional
Text file containing the XYZ coordinates of the ligand.
template_pdb: str, optional
Ligand PDB with atoms beginning from 1 to be used as a template PDB
to retrieve atom indices and symbols.
system_pdb: str, optional
PDB file for the torsiondrive torsion scans
system_sdf : str, optional
Maximum number of geometry optimization steps.
system_xml : str, optional
XML force field file for the ligand.
qm_scan_file : str, optional
Output scan file for the torsiondrive scans.
load_topology : str, optional
Argument to specify how to load the topology. Can either
be "openmm" or "parmed".
method : str, optional
Minimization method for fitting of torsional
parameters.
dihedral_text_file : str, optional
Dihedral information file for torsiondrive.
system_init_sdf : str, optional
Ligand SDF (structure-data) format file. This file will be generated
only if the ligand is charged.
reparameterised_system_xml_file : str, optional
Reparameterized force field XML file obtained using
openforcefield without torsional reparamaterization.
reparameterised_torsional_system_xml_file : str, optional
XML force field file for the ligand obtained with
torsional reparamaterization.
"""
[docs] def __init__(
self,
# TODO: some of these variables are ints, and should be initialized as ints
num_charge_atoms="",
index_charge_atom_1="",
charge_atom_1="",
tor_dir="torsion_dir",
reparameterized_torsional_params_file="reparameterized_torsional_params.txt",
psi_input_file="torsion_drive_input.dat",
xyz_file="torsion_drive_input.xyz",
coords_file="torsion_drive_input.txt",
template_pdb="guest_init_II.pdb",
system_pdb="torsion_drive_input.pdb",
system_sdf="torsion_drive_input.sdf",
system_xml="torsion_drive_input.xml",
qm_scan_file="scan.xyz",
load_topology="openmm",
method="L-BFGS-B",
dihedral_text_file="dihedrals.txt",
system_init_sdf="torsion_drive_input_init.sdf",
reparameterised_system_xml_file="guest_reparameterised.xml",
reparameterised_torsional_system_xml_file="guest_torsional_reparameterized.xml",
):
self.num_charge_atoms = num_charge_atoms
self.index_charge_atom_1 = index_charge_atom_1
self.charge_atom_1 = charge_atom_1
self.tor_dir = tor_dir
self.reparameterized_torsional_params_file = (
reparameterized_torsional_params_file
)
self.psi_input_file = psi_input_file
self.xyz_file = xyz_file
self.coords_file = coords_file
self.template_pdb = template_pdb
self.system_pdb = system_pdb
self.system_sdf = system_sdf
self.system_xml = system_xml
self.qm_scan_file = qm_scan_file
self.method = method
self.dihedral_text_file = dihedral_text_file
self.system_init_sdf = system_init_sdf
self.load_topology = load_topology
self.reparameterised_system_xml_file = reparameterised_system_xml_file
self.reparameterised_torsional_system_xml_file = (
reparameterised_torsional_system_xml_file
)
[docs] def write_reparams_torsion_lines(self):
"""
Saves a text file containing torsional parameters for the ligand
obtained through openforcefield.
"""
torsional_parameters_list = []
parent_cwd = os.getcwd()
# TODO: use os.path.join
target_dir = os.path.join(parent_cwd, self.tor_dir)
# TODO: let's use a more informative variable name than 'i'
for i in os.listdir(target_dir):
os.chdir(os.path.join(parent_cwd, self.tor_dir, i))
if os.path.isfile(self.qm_scan_file):
print("Entering directory" + " : " + os.getcwd())
torsiondrive_input_to_xyz(
psi_input_file=self.psi_input_file, xyz_file=self.xyz_file,
)
xyz_to_pdb(
xyz_file=self.xyz_file,
coords_file=self.coords_file,
template_pdb=self.template_pdb,
system_pdb=self.system_pdb,
)
generate_xml_from_charged_pdb_sdf(
system_pdb=self.system_pdb,
system_init_sdf=self.system_init_sdf,
system_sdf=self.system_sdf,
num_charge_atoms=self.num_charge_atoms,
index_charge_atom_1=self.index_charge_atom_1,
charge_atom_1=self.charge_atom_1,
system_xml=self.system_xml,
)
torsional_lines = get_torsional_lines(
template_pdb=self.template_pdb,
system_xml=self.system_xml,
qm_scan_file=self.qm_scan_file,
load_topology=self.load_topology,
method=self.method,
dihedral_text_file=self.dihedral_text_file,
)
# print(torsional_lines)
torsional_parameters_list.append(torsional_lines)
remove_mm_files(qm_scan_file=self.qm_scan_file)
os.chdir(parent_cwd)
else:
print("Entering directory" + " : " + os.getcwd())
print(
"Torsional Scan file not found, optimization may not \
be complete. Existing!!"
)
os.chdir(parent_cwd)
torsional_parameters = [
item for sublist in torsional_parameters_list for item in sublist
]
with open(self.reparameterized_torsional_params_file, "w") as f:
for i in torsional_parameters:
f.write(i + "\n")
[docs] def write_reparams_torsion_lines_charged(self):
"""
Saves a text file containing torsional parameters for a charged ligand
obtained through openforcefield.
"""
torsional_parameters_list = []
parent_cwd = os.getcwd()
target_dir = os.path.join(parent_cwd, self.tor_dir)
for i in os.listdir(target_dir):
os.chdir(os.path.join(parent_cwd, self.tor_dir, i))
if os.path.isfile(self.qm_scan_file):
print("Entering directory" + " : " + os.getcwd())
torsiondrive_input_to_xyz(
psi_input_file=self.psi_input_file, xyz_file=self.xyz_file,
)
xyz_to_pdb(
xyz_file=self.xyz_file,
coords_file=self.coords_file,
template_pdb=self.template_pdb,
system_pdb=self.system_pdb,
)
generate_xml_from_charged_pdb_sdf(
system_pdb=self.system_pdb,
system_init_sdf=self.system_init_sdf,
system_sdf=self.system_sdf,
num_charge_atoms=self.num_charge_atoms,
index_charge_atom_1=self.index_charge_atom_1,
charge_atom_1=self.charge_atom_1,
system_xml=self.system_xml,
)
torsional_lines = get_torsional_lines(
template_pdb=self.template_pdb,
system_xml=self.system_xml,
qm_scan_file=self.qm_scan_file,
load_topology=self.load_topology,
method=self.method,
dihedral_text_file=self.dihedral_text_file,
)
# print(torsional_lines)
torsional_parameters_list.append(torsional_lines)
remove_mm_files(qm_scan_file=self.qm_scan_file)
os.chdir(parent_cwd)
else:
print("Entering directory" + " : " + os.getcwd())
print(
"Torsional Scan file not found, optimization may not \
be complete. Existing!!"
)
os.chdir(parent_cwd)
torsional_parameters = [
item for sublist in torsional_parameters_list for item in sublist
]
with open(self.reparameterized_torsional_params_file, "w") as f:
for i in torsional_parameters:
f.write(i + "\n")
[docs] def write_torsional_reparams(self):
"""
Generates a XML force field file for the ligand with reparameterized
torsional parameters.
"""
with open(self.reparameterized_torsional_params_file, "r") as xml_tor:
xml_tor_lines = xml_tor.readlines()
non_zero_k_tor = []
for i in xml_tor_lines:
to_find = "k=" + '"' + "0.0" + '"'
if to_find not in i:
non_zero_k_tor.append(i)
# print(non_zero_k_tor)
p1 = []
for i in range(len(non_zero_k_tor)):
p1.append(int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[2]))
# print(p1)
p2 = []
for i in range(len(non_zero_k_tor)):
p2.append(int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[4]))
# print(p2)
p3 = []
for i in range(len(non_zero_k_tor)):
p3.append(int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[6]))
# print(p3)
p4 = []
for i in range(len(non_zero_k_tor)):
p4.append(int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[8]))
# print(p4)
periodicity = []
for i in range(len(non_zero_k_tor)):
periodicity.append(
int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[9])
)
# print(periodicity)
# TODO: there may be a way to consolidate the reparametrization of
# the XML file to obey the DRY principle
xml_tor_reparams = open(self.reparameterised_system_xml_file, "r")
xml_tor_reparams_lines = xml_tor_reparams.readlines()
# A string template and formatting should be used here
for j in range(len(xml_tor_reparams_lines)):
for i in range(len(non_zero_k_tor)):
to_find_tor = (
"p1="
+ '"'
+ str(p1[i])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(p2[i])
+ '"'
+ " "
+ "p3="
+ '"'
+ str(p3[i])
+ '"'
+ " "
+ "p4="
+ '"'
+ str(p4[i])
+ '"'
+ " "
+ "periodicity="
+ '"'
+ str(periodicity[i])
+ '"'
)
if to_find_tor in xml_tor_reparams_lines[j]:
# print(xml_tor_reparams_lines[j])
xml_tor_reparams_lines[j] = non_zero_k_tor[i]
with open(self.reparameterised_torsional_system_xml_file, "w") as f:
for i in xml_tor_reparams_lines:
f.write(i)
[docs]class PrepareSolvatedParams:
"""
A class used to integrate the parameterized topology
files of the receptor - ligand complex and the solvent.
This class contain methods to concatanate the solvent (and
ions ) and the receptor - ligand complex in a single
parameterized topology file (prmtop and inpcrd).
...
Attributes
----------
init_pdb : str
Initial PDB file containing the receptor-ligand complex with
solvent, ions, etc.
intermediate_pdb : str, optional
An intermediate PDB file formed during pdb4amber processing.
solvent_pdb : str, optional
PDB file containing the water, ions, etc.
solvent_prmtop : str, optional
Solvent topology file.
solvent_inpcrd : str, optional
Solvent coordinate file.
solvent_amber_pdb : str, optional
Solvent PDB file saved from Amber's tleap.
solvent_leap : str, optional
Solvent tleap file for parameterizing the solvent.
system_prmtop : str, optional
Topology file of the receptor - ligand complex.
system_inpcrd : str, optional
Coordinate file of the receptor - ligand complex.
system_output: str, optional
PDB file containing the trajectory coordinates for
the OpenMM simulation.
sim_steps: str, optional
Number of steps in the OpenMM MD simulation.
system_solvent_prmtop : str, optional
Topology file of the receptor - ligand complex and
the solvent.
system_solvent_inpcrd : str, optional
Coordinate file of the receptor - ligand complex and
the solvent.
system_solvent_pdb : str, optional
PDB file of the receptor - ligand complex and
the solvent.
"""
def __init__(
self,
init_pdb,
intermediate_pdb="intermediate.pdb",
solvent_pdb="solvent.pdb",
solvent_prmtop="solvent.prmtop",
solvent_inpcrd="solvent.inpcrd",
solvent_amber_pdb="solvent_amber.pdb",
solvent_leap="solvent.leap",
system_prmtop="system_torsional_params.prmtop",
system_inpcrd="system_torsional_params.inpcrd",
system_output="sim_output.pdb",
sim_steps=1000,
system_solvent_prmtop="system_qmmmrebind.prmtop",
system_solvent_inpcrd="system_qmmmrebind.inpcrd",
system_solvent_pdb="system_qmmmrebind.pdb",
):
self.init_pdb = init_pdb
self.intermediate_pdb = intermediate_pdb
self.solvent_pdb = solvent_pdb
self.solvent_prmtop = solvent_prmtop
self.solvent_inpcrd = solvent_inpcrd
self.solvent_amber_pdb = solvent_amber_pdb
self.solvent_leap = solvent_leap
self.system_prmtop = system_prmtop
self.system_inpcrd = system_inpcrd
self.system_output = system_output
self.sim_steps = sim_steps
self.system_solvent_prmtop = system_solvent_prmtop
self.system_solvent_inpcrd = system_solvent_inpcrd
self.system_solvent_pdb = system_solvent_pdb
[docs] def create_solvent_pdb(self):
"""
Generates a PDB file containing the solvent and the ions.
"""
water_variables = ["HOH", "WAT"]
ions = [
"Na+",
"Cs+",
"K+",
"Li+",
"Rb+",
"Cl-",
"Br-",
"F-",
"I-",
"Ca2",
]
pdb_variables = ["END", "CRYST"]
with open(self.init_pdb) as f1, open(self.intermediate_pdb, "w") as f2:
for line in f1:
if (
any(
water_variable in line
for water_variable in water_variables
)
or any(
pdb_variable in line for pdb_variable in pdb_variables
)
or any(ion in line for ion in ions)
):
f2.write(line)
command = (
"pdb4amber -i " + self.intermediate_pdb + " -o " + self.solvent_pdb
)
os.system(command)
command = (
"rm -rf "
+ self.solvent_pdb[:-4]
+ "_nonprot.pdb "
+ self.solvent_pdb[:-4]
+ "_renum.txt "
+ self.solvent_pdb[:-4]
+ "_sslink"
)
os.system(command)
command = "rm -rf " + self.intermediate_pdb
os.system(command)
[docs] def parameterize_solvent_pdb(self):
"""
Generates a topology file (prmtop) and a coordinate
file (inpcrd) for the solvent system.
"""
line_0 = " "
line_1 = "source leaprc.protein.ff14SB"
line_2 = "source leaprc.water.tip3p"
line_3 = "loadAmberParams frcmod.ionsjc_tip3p"
line_4 = "pdb = loadpdb " + self.solvent_pdb
line_5 = (
"saveamberparm pdb "
+ self.solvent_prmtop
+ " "
+ self.solvent_inpcrd
)
line_6 = "savepdb pdb " + self.solvent_amber_pdb
line_7 = "quit"
with open(self.solvent_leap, "w") as f:
f.write(line_0 + "\n")
f.write(line_1 + "\n")
f.write(line_2 + "\n")
f.write(line_3 + "\n")
f.write(line_4 + "\n")
f.write(line_5 + "\n")
f.write(line_6 + "\n")
f.write(line_7 + "\n")
command = "tleap -f " + self.solvent_leap
os.system(command)
command = "rm -rf leap.log " + self.solvent_leap
os.system(command)
[docs] def run_openmm_solvent_prmtop_inpcrd(self):
"""
Runs OpenMM MD simulation with prmtop and inpcrd file
for the solvent.
"""
print(
"Running OpenMM simulation for "
+ self.solvent_prmtop
+ " and "
+ self.solvent_inpcrd
)
prmtop = simtk.openmm.app.AmberPrmtopFile(self.solvent_prmtop)
inpcrd = simtk.openmm.app.AmberInpcrdFile(self.solvent_inpcrd)
system = prmtop.createSystem()
integrator = simtk.openmm.LangevinIntegrator(
300 * simtk.unit.kelvin,
1 / simtk.unit.picosecond,
0.002 * simtk.unit.picoseconds,
)
simulation = simtk.openmm.app.Simulation(
prmtop.topology, system, integrator
)
simulation.context.setPositions(inpcrd.positions)
if inpcrd.boxVectors is not None:
simulation.context.setPeriodicBoxVectors(*inpcrd.boxVectors)
simulation.minimizeEnergy(maxIterations=100000)
simulation.reporters.append(
simtk.openmm.app.PDBReporter(
self.system_output, self.sim_steps / 10
)
)
simulation.reporters.append(
simtk.openmm.app.StateDataReporter(
stdout,
reportInterval=int(self.sim_steps / 10),
step=True,
potentialEnergy=True,
temperature=True,
)
)
simulation.step(self.sim_steps)
command = "rm -rf " + self.system_output
os.system(command)
[docs] def run_openmm_solvent_prmtop_pdb(self):
"""
Runs OpenMM MD simulation with prmtop and PDB file
for the solvent.
"""
print(
"Running OpenMM simulation for "
+ self.solvent_prmtop
+ " and "
+ self.solvent_amber_pdb
)
pdb = simtk.openmm.app.PDBFile(self.solvent_amber_pdb)
prmtop = simtk.openmm.app.AmberPrmtopFile(self.solvent_prmtop)
system = prmtop.createSystem()
integrator = simtk.openmm.LangevinIntegrator(
300 * simtk.unit.kelvin,
1 / simtk.unit.picosecond,
0.002 * simtk.unit.picoseconds,
)
simulation = simtk.openmm.app.Simulation(
prmtop.topology, system, integrator
)
simulation.context.setPositions(pdb.positions)
simulation.minimizeEnergy(maxIterations=100000)
simulation.reporters.append(
simtk.openmm.app.PDBReporter(
self.system_output, self.sim_steps / 10
)
)
simulation.reporters.append(
simtk.openmm.app.StateDataReporter(
stdout,
reportInterval=int(self.sim_steps / 10),
step=True,
potentialEnergy=True,
temperature=True,
)
)
simulation.step(self.sim_steps)
command = "rm -rf " + self.system_output
os.system(command)
[docs] def merge_topology_files_system_solvent(self):
"""
Merge the system and solvent topology and coordinate
files.
"""
print(
"Merging the "
+ self.system_prmtop
+ " "
+ self.solvent_prmtop
+ " files"
)
print(
"Merging the "
+ self.system_inpcrd
+ " "
+ self.solvent_inpcrd
+ " files"
)
system = parmed.load_file(self.system_prmtop, xyz=self.system_inpcrd)
solvent = parmed.load_file(
self.solvent_prmtop, xyz=self.solvent_inpcrd
)
system_solvent = system + solvent
system_solvent.save(self.system_solvent_prmtop, overwrite=True)
system_solvent.save(self.system_solvent_inpcrd, overwrite=True)
system_solvent.save(self.system_solvent_pdb, overwrite=True)
[docs] def run_openmm_system_solvent_prmtop_inpcrd(self):
"""
Runs OpenMM MD simulation with prmtop and inpcrd file
for the solvent - system complex.
"""
print(
"Running OpenMM simulation for "
+ self.system_solvent_prmtop
+ " and "
+ self.system_solvent_inpcrd
)
prmtop = simtk.openmm.app.AmberPrmtopFile(self.system_solvent_prmtop)
inpcrd = simtk.openmm.app.AmberInpcrdFile(self.system_solvent_inpcrd)
system = prmtop.createSystem()
integrator = simtk.openmm.LangevinIntegrator(
300 * simtk.unit.kelvin,
1 / simtk.unit.picosecond,
0.002 * simtk.unit.picoseconds,
)
simulation = simtk.openmm.app.Simulation(
prmtop.topology, system, integrator
)
simulation.context.setPositions(inpcrd.positions)
if inpcrd.boxVectors is not None:
simulation.context.setPeriodicBoxVectors(*inpcrd.boxVectors)
simulation.minimizeEnergy(maxIterations=100000)
simulation.reporters.append(
simtk.openmm.app.PDBReporter(
self.system_output, self.sim_steps / 10
)
)
simulation.reporters.append(
simtk.openmm.app.StateDataReporter(
stdout,
reportInterval=int(self.sim_steps / 10),
step=True,
potentialEnergy=True,
temperature=True,
)
)
simulation.step(self.sim_steps)
command = "rm -rf " + self.system_output
os.system(command)
[docs] def run_openmm_system_solvent_prmtop_pdb(self):
"""
Runs OpenMM MD simulation with prmtop and PDB file
for the solvent - system complex.
"""
print(
"Running OpenMM simulation for "
+ self.system_solvent_prmtop
+ " and "
+ self.system_solvent_pdb
)
pdb = simtk.openmm.app.PDBFile(self.system_solvent_pdb)
prmtop = simtk.openmm.app.AmberPrmtopFile(self.system_solvent_prmtop)
system = prmtop.createSystem()
integrator = simtk.openmm.LangevinIntegrator(
300 * simtk.unit.kelvin,
1 / simtk.unit.picosecond,
0.002 * simtk.unit.picoseconds,
)
simulation = simtk.openmm.app.Simulation(
prmtop.topology, system, integrator
)
simulation.context.setPositions(pdb.positions)
simulation.minimizeEnergy(maxIterations=100000)
simulation.reporters.append(
simtk.openmm.app.PDBReporter(
self.system_output, self.sim_steps / 10
)
)
simulation.reporters.append(
simtk.openmm.app.StateDataReporter(
stdout,
reportInterval=int(self.sim_steps / 10),
step=True,
potentialEnergy=True,
temperature=True,
)
)
simulation.step(self.sim_steps)
command = "rm -rf " + self.system_output
os.system(command)
[docs]class SystemAmberSystem:
"""
A class used to generate a force field XML file for the system
from the given amber forcefield topology files and
regenerate the reparameterised forcefield XML file.
This class contain methods to generate a XML force field through
parmed if the amber forcefield topology files are given.
Re-parameterized XML force field files are then generated from
these XML focefield files. Different energy components such as
bond, angle, torsional and non-bonded energies are computed for the
non-reparametrized and the reparameterized force fields. Difference
between the non-reparameterized and reparameterized force field energies
can then be analyzed.
...
Attributes
----------
host_pdb: str, optional
PDB file for the host.
system_pdb: str, optional
PDB file for the system (host, guest and solvent).
prmtop_system: str, optional
Topology file for the system (host, guest and solvent).
system_xml: str, optional
Serialised XML forcefield file generated by parmed.
charge_parameter_file_guest: str, optional
Receptor PDB file with atom numbers beginning from 1.
guest_qm_pdb: str, optional
Ligand PDB file with atom numbers beginning from 1.
bond_parameter_file_guest: str, optional
Text file containing the bond parameters for the ligand.
angle_parameter_file_guest: str, optional
Text file containing the angle parameters of the ligand.
guest_qm_params_file: str, optional
Text file containing QM obtained parameters for the ligand.
charge_parameter_file_host: str, optional
File containing the charges of receptor atoms and their
corresponding atoms.
bond_parameter_file_host: str, optional
Text file containing the bond parameters for the receptor.
host_qm_pdb: str, optional
Receptor QM region's PDB file with atom numbers beginning from 1.
angle_parameter_file_host: str, optional
Text file containing the angle parameters of the receptor.
host_qm_params_file: str, optional
Text file containing QM obtained parameters for the receptor.
host_guest_qm_params_file: str, optional
Text file containing QM obtained parameters for the system.
reparameterised_intermediate_system_xml_file: str, optional
XML force field file with bond and angle parameter lines replaced by
corresponding values obtained from the QM calculations.
system_xml_non_bonded_file: str, optional
Text file to write the NonBondedForce Charge Parameters from
the non-parameterised system XML file.
system_xml_non_bonded_reparams_file: str, optional
Text file containing the non-bonded parameters parsed from the
XML force field file.
reparameterised_system_xml_file: str, optional
Reparameterized force field XML file obtained using
openforcefield.
reparameterized_torsional_params_file : str, optional
Text file containing the forcefield parameters for the
ligand previously obtained without torsional reparameterization.
reparameterised_intermediate_torsional_system_xml_file : str, optional
XML force field file for the system (without the QM charges) obtained
with torsional reparamaterization.
reparameterised_torsional_system_xml_file : str, optional
XML force field file for the system obtained with
torsional reparamaterization.
load_topology: str, optional
Argument to specify how to load the topology. Can either be "openmm"
or "parmed".
non_reparameterised_system_xml_file: str, optional
Non-reparameterized force field XML file.
prmtop_system_non_params: str, optional
Non-reparameterized topology file.
inpcrd_system_non_params: str, optional
Non-reparameterized INPCRD file.
prmtop_system_intermediate_params: str, optional
Reparameterized topology file but without the QM charges.
inpcrd_system_intermediate_params: str, optional
Reparameterized INPCRD file but without the QM charges.
prmtop_system_params: str, optional
Reparameterized topology file.
inpcrd_system_params: str, optional
Reparameterized INPCRD file.
"""
def __init__(
self,
host_pdb="host.pdb",
system_pdb="",
prmtop_system="hostguest.parm7",
system_xml="hostguest.xml",
charge_parameter_file_guest="guest_qm_surround_charges.txt",
guest_qm_pdb="guest_init_II.pdb",
bond_parameter_file_guest="guest_bonds.txt",
angle_parameter_file_guest="guest_angles.txt",
guest_qm_params_file="guest_qm_params.txt",
charge_parameter_file_host="host_qm_surround_charges.txt",
bond_parameter_file_host="host_qm_bonds.txt",
host_qm_pdb="host_qm.pdb",
angle_parameter_file_host="host_qm_angles.txt",
host_qm_params_file="host_qm_params.txt",
host_guest_qm_params_file="host_guest_qm_params.txt",
reparameterised_intermediate_system_xml_file="hostguest_intermediate.xml",
system_xml_non_bonded_file="hostguest_non_bonded.txt",
system_xml_non_bonded_reparams_file="hostguest_non_bonded_reparams.txt",
reparameterised_system_xml_file="hostguest_reparameterised.xml",
reparameterized_torsional_params_file="reparameterized_torsional_params.txt",
reparameterised_intermediate_torsional_system_xml_file="reparameterized_torsional_params.txt",
reparameterised_torsional_system_xml_file="hostguest_torsional_reparameterised.xml",
load_topology="openmm",
non_reparameterised_system_xml_file="hostguest.xml",
prmtop_system_non_params="hostguest.parm7",
inpcrd_system_non_params="hostguest_non_params.pdb",
prmtop_system_intermediate_params="hostguest_intermediate.prmtop",
inpcrd_system_intermediate_params="hostguest_intermediate.inpcrd",
prmtop_system_params="hostguest_params.prmtop",
inpcrd_system_params="hostguest_params.inpcrd",
):
self.host_pdb = host_pdb
self.system_pdb = system_pdb
self.prmtop_system = prmtop_system
self.system_xml = system_xml
self.charge_parameter_file_guest = charge_parameter_file_guest
self.guest_qm_pdb = guest_qm_pdb
self.bond_parameter_file_guest = bond_parameter_file_guest
self.angle_parameter_file_guest = angle_parameter_file_guest
self.guest_qm_params_file = guest_qm_params_file
self.charge_parameter_file_host = charge_parameter_file_host
self.bond_parameter_file_host = bond_parameter_file_host
self.host_qm_pdb = host_qm_pdb
self.angle_parameter_file_host = angle_parameter_file_host
self.host_qm_params_file = host_qm_params_file
self.host_guest_qm_params_file = host_guest_qm_params_file
self.reparameterised_intermediate_system_xml_file = (
reparameterised_intermediate_system_xml_file
)
self.system_xml_non_bonded_file = system_xml_non_bonded_file
self.system_xml_non_bonded_reparams_file = (
system_xml_non_bonded_reparams_file
)
self.reparameterised_system_xml_file = reparameterised_system_xml_file
self.reparameterized_torsional_params_file = (
reparameterized_torsional_params_file
)
self.reparameterised_intermediate_torsional_system_xml_file = (
reparameterised_intermediate_torsional_system_xml_file
)
self.reparameterised_torsional_system_xml_file = (
reparameterised_torsional_system_xml_file
)
self.load_topology = load_topology
self.non_reparameterised_system_xml_file = (
non_reparameterised_system_xml_file
)
self.prmtop_system_non_params = prmtop_system_non_params
self.inpcrd_system_non_params = inpcrd_system_non_params
self.prmtop_system_intermediate_params = (
prmtop_system_intermediate_params
)
self.inpcrd_system_intermediate_params = (
inpcrd_system_intermediate_params
)
self.prmtop_system_params = prmtop_system_params
self.inpcrd_system_params = inpcrd_system_params
[docs] def generate_xml_from_prmtop(self):
"""
Generates a serialsed XML forcefield file through parmed, given
the PDB file and its corresponding topology file.
"""
parm = parmed.load_file(self.prmtop_system, self.system_pdb)
system = parm.createSystem()
with open(self.system_xml, "w") as f:
f.write(simtk.openmm.XmlSerializer.serialize(system))
[docs] def write_guest_params_non_zero(self):
"""
Saves the parameters of the ligand obtained from the QM log files
in a text file starting from non-zero ( indexing begins from the
index of the last atom of the receptor ).
"""
# Charges from QM files
df_charges = pd.read_csv(
self.charge_parameter_file_guest, header=None, delimiter=r"\s+"
)
df_charges.columns = ["atom", "charges"]
qm_charges = df_charges["charges"].values.tolist()
qm_charges = [round(num, 6) for num in qm_charges]
# print(qm_charges)
# Bond Parameters from QM files
ppdb = PandasPdb()
ppdb.read_pdb(self.guest_qm_pdb)
atom_name_list = ppdb.df["ATOM"]["atom_number"].values.tolist()
# atom_name_list = [i - 1 for i in atom_name_list]
no_host_atoms = get_num_host_atoms(self.host_pdb)
atom_name_list = [i - 1 + no_host_atoms for i in atom_name_list]
# print(atom_name_list)
df = pd.read_csv(
self.bond_parameter_file_guest, header=None, delimiter=r"\s+"
)
df.columns = ["bond", "k_bond", "bond_length", "bond_1", "bond_2"]
# print(df.head())
bond_1_list = df["bond_1"].values.tolist()
bond_1_list = [x - 1 + min(atom_name_list) for x in bond_1_list]
bond_2_list = df["bond_2"].values.tolist()
bond_2_list = [x - 1 + min(atom_name_list) for x in bond_2_list]
# print(bond_1_list)
# print(bond_2_list)
k_bond_list = df["k_bond"].values.tolist()
k_bond_list = [
i * KCAL_MOL_PER_KJ_MOL * ANGSTROMS_PER_NM**2 for i in k_bond_list
] # kcal/mol * A^2 to kJ/mol * nm^2
k_bond_list = [round(num, 10) for num in k_bond_list]
# print(k_bond_list)
bond_length_list = df["bond_length"].values.tolist()
bond_length_list = [i / 10.00 for i in bond_length_list]
bond_length_list = [round(num, 6) for num in bond_length_list]
# print(bond_length_list)
# Angle Parameters from QM files
ppdb = PandasPdb()
ppdb.read_pdb(self.guest_qm_pdb)
atom_name_list = ppdb.df["ATOM"]["atom_number"].values.tolist()
# atom_name_list = [i - 1 for i in atom_name_list]
no_host_atoms = get_num_host_atoms(self.host_pdb)
atom_name_list = [i - 1 + no_host_atoms for i in atom_name_list]
# print(atom_name_list)
df = pd.read_csv(
self.angle_parameter_file_guest, header=None, delimiter=r"\s+"
)
df.columns = [
"angle",
"k_angle",
"angle_degrees",
"angle_1",
"angle_2",
"angle_3",
]
# print(df.head())
angle_1_list = df["angle_1"].values.tolist()
angle_1_list = [x - 1 + min(atom_name_list) for x in angle_1_list]
# print(angle_1_list)
angle_2_list = df["angle_2"].values.tolist()
angle_2_list = [x - 1 + min(atom_name_list) for x in angle_2_list]
# print(angle_2_list)
angle_3_list = df["angle_3"].values.tolist()
angle_3_list = [x - 1 + min(atom_name_list) for x in angle_3_list]
# print(angle_3_list)
k_angle_list = df["k_angle"].values.tolist()
k_angle_list = [
i * 4.184 for i in k_angle_list
] # kcal/mol * radian^2 to kJ/mol * radian^2
k_angle_list = [round(num, 6) for num in k_angle_list]
# print(k_angle_list)
angle_list = df["angle_degrees"].values.tolist()
angle_list = [(i * math.pi) / 180.00 for i in angle_list]
angle_list = [round(num, 6) for num in angle_list]
# print(angle_list)
xml = open(self.guest_qm_params_file, "w")
xml.write("Begin writing the Bond Parameters" + "\n")
for i in range(len(k_bond_list)):
xml.write(
" "
+ "<Bond"
+ " "
+ "d="
+ '"'
+ str(bond_length_list[i])
+ '"'
+ " "
+ "k="
+ '"'
+ str(k_bond_list[i])
+ '"'
+ " "
+ "p1="
+ '"'
+ str(bond_1_list[i])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(bond_2_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Bond Parameters" + "\n")
xml.write("Begin writing the Angle Parameters" + "\n")
for i in range(len(k_angle_list)):
xml.write(
" "
+ "<Angle"
+ " "
+ "a="
+ '"'
+ str(angle_list[i])
+ '"'
+ " "
+ "k="
+ '"'
+ str(k_angle_list[i])
+ '"'
+ " "
+ "p1="
+ '"'
+ str(angle_1_list[i])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(angle_2_list[i])
+ '"'
+ " "
+ "p3="
+ '"'
+ str(angle_3_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Angle Parameters" + "\n")
xml.write("Begin writing the Charge Parameters" + "\n")
for i in range(len(qm_charges)):
xml.write(
"<Particle"
+ " "
+ "q="
+ '"'
+ str(qm_charges[i])
+ '"'
+ " "
+ "eps="
+ '"'
+ str(0.00)
+ '"'
+ " "
+ "sig="
+ '"'
+ str(0.00)
+ '"'
+ " "
+ "atom="
+ '"'
+ str(atom_name_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Charge Parameters" + "\n")
xml.close()
[docs] def write_host_params(self):
"""
Saves the parameters obtained from the QM log files of the
receptor in a text file.
"""
# Charges from QM files
df_charges = pd.read_csv(
self.charge_parameter_file_host, header=None, delimiter=r"\s+"
)
df_charges.columns = ["atom", "charges"]
qm_charges = df_charges["charges"].values.tolist()
qm_charges = [round(num, 6) for num in qm_charges]
# print(qm_charges)
# Bond Parameters from QM files
ppdb = PandasPdb()
ppdb.read_pdb(self.host_qm_pdb)
atom_name_list = ppdb.df["ATOM"]["atom_number"].values.tolist()
atom_name_list = [i - 1 for i in atom_name_list]
# print(atom_name_list)
df = pd.read_csv(
self.bond_parameter_file_host, header=None, delimiter=r"\s+"
)
df.columns = ["bond", "k_bond", "bond_length", "bond_1", "bond_2"]
# print(df.head())
bond_1_list = df["bond_1"].values.tolist()
bond_1_list = [x - 1 + min(atom_name_list) for x in bond_1_list]
bond_2_list = df["bond_2"].values.tolist()
bond_2_list = [x - 1 + min(atom_name_list) for x in bond_2_list]
# print(bond_1_list)
# print(bond_2_list)
k_bond_list = df["k_bond"].values.tolist()
k_bond_list = [
i * KCAL_MOL_PER_KJ_MOL * ANGSTROMS_PER_NM**2 for i in k_bond_list
] # kcal/mol * A^2 to kJ/mol * nm^2
k_bond_list = [round(num, 10) for num in k_bond_list]
# print(k_bond_list)
bond_length_list = df["bond_length"].values.tolist()
bond_length_list = [i / 10.00 for i in bond_length_list]
bond_length_list = [round(num, 6) for num in bond_length_list]
# print(bond_length_list)
# Angle Parameters from QM files
ppdb = PandasPdb()
ppdb.read_pdb(self.host_qm_pdb)
atom_name_list = ppdb.df["ATOM"]["atom_number"].values.tolist()
atom_name_list = [i - 1 for i in atom_name_list]
# print(atom_name_list)
df = pd.read_csv(
self.angle_parameter_file_host, header=None, delimiter=r"\s+"
)
df.columns = [
"angle",
"k_angle",
"angle_degrees",
"angle_1",
"angle_2",
"angle_3",
]
# print(df.head())
angle_1_list = df["angle_1"].values.tolist()
angle_1_list = [x - 1 + min(atom_name_list) for x in angle_1_list]
# print(angle_1_list)
angle_2_list = df["angle_2"].values.tolist()
angle_2_list = [x - 1 + min(atom_name_list) for x in angle_2_list]
# print(angle_2_list)
angle_3_list = df["angle_3"].values.tolist()
angle_3_list = [x - 1 + min(atom_name_list) for x in angle_3_list]
# print(angle_3_list)
k_angle_list = df["k_angle"].values.tolist()
k_angle_list = [
i * 4.184 for i in k_angle_list
] # kcal/mol * radian^2 to kJ/mol * radian^2
k_angle_list = [round(num, 6) for num in k_angle_list]
# print(k_angle_list)
angle_list = df["angle_degrees"].values.tolist()
angle_list = [(i * math.pi) / 180.00 for i in angle_list]
angle_list = [round(num, 6) for num in angle_list]
# print(angle_list)
xml = open(self.host_qm_params_file, "w")
xml.write("Begin writing the Bond Parameters" + "\n")
for i in range(len(k_bond_list)):
xml.write(
" "
+ "<Bond"
+ " "
+ "d="
+ '"'
+ str(bond_length_list[i])
+ '"'
+ " "
+ "k="
+ '"'
+ str(k_bond_list[i])
+ '"'
+ " "
+ "p1="
+ '"'
+ str(bond_1_list[i])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(bond_2_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Bond Parameters" + "\n")
xml.write("Begin writing the Angle Parameters" + "\n")
for i in range(len(k_angle_list)):
xml.write(
" "
+ "<Angle"
+ " "
+ "a="
+ '"'
+ str(angle_list[i])
+ '"'
+ " "
+ "k="
+ '"'
+ str(k_angle_list[i])
+ '"'
+ " "
+ "p1="
+ '"'
+ str(angle_1_list[i])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(angle_2_list[i])
+ '"'
+ " "
+ "p3="
+ '"'
+ str(angle_3_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Angle Parameters" + "\n")
xml.write("Begin writing the Charge Parameters" + "\n")
for i in range(len(qm_charges)):
xml.write(
"<Particle"
+ " "
+ "q="
+ '"'
+ str(qm_charges[i])
+ '"'
+ " "
+ "eps="
+ '"'
+ str(0.00)
+ '"'
+ " "
+ "sig="
+ '"'
+ str(0.00)
+ '"'
+ " "
+ "atom="
+ '"'
+ str(atom_name_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Charge Parameters" + "\n")
xml.close()
[docs] def merge_qm_params(self):
"""
Saves the parameters of the ligand obtained from the QM log files
in a text file starting from non-zero ( indexing begins from the
index of the last atom of the receptor ).
"""
# Bond Parameters Host
f_params_host = open(self.host_qm_params_file, "r")
lines_params_host = f_params_host.readlines()
# Bond Parameters Host
for i in range(len(lines_params_host)):
if "Begin writing the Bond Parameters" in lines_params_host[i]:
to_begin = int(i)
if "Finish writing the Bond Parameters" in lines_params_host[i]:
to_end = int(i)
bond_params_host = lines_params_host[to_begin + 1 : to_end]
# Bond Parameters Guest
f_params_guest = open(self.guest_qm_params_file, "r")
lines_params_guest = f_params_guest.readlines()
# Bond Parameters Guest
for i in range(len(lines_params_guest)):
if "Begin writing the Bond Parameters" in lines_params_guest[i]:
to_begin = int(i)
if "Finish writing the Bond Parameters" in lines_params_guest[i]:
to_end = int(i)
bond_params_guest = lines_params_guest[to_begin + 1 : to_end]
bond_systems_params = bond_params_host + bond_params_guest
# Angle Parameters Host
f_params_host = open(self.host_qm_params_file, "r")
lines_params_host = f_params_host.readlines()
# Angle Parameters Host
for i in range(len(lines_params_host)):
if "Begin writing the Angle Parameters" in lines_params_host[i]:
to_begin = int(i)
if "Finish writing the Angle Parameters" in lines_params_host[i]:
to_end = int(i)
angle_params_host = lines_params_host[to_begin + 1 : to_end]
# Angle Parameters Guest
f_params_guest = open(self.guest_qm_params_file, "r")
lines_params_guest = f_params_guest.readlines()
# Angle Parameters Guest
for i in range(len(lines_params_guest)):
if "Begin writing the Angle Parameters" in lines_params_guest[i]:
to_begin = int(i)
if "Finish writing the Angle Parameters" in lines_params_guest[i]:
to_end = int(i)
angle_params_guest = lines_params_guest[to_begin + 1 : to_end]
angle_systems_params = angle_params_host + angle_params_guest
# Charge Parameters Host
f_params_host = open(self.host_qm_params_file, "r")
lines_params_host = f_params_host.readlines()
# Charge Parameters Host
for i in range(len(lines_params_host)):
if "Begin writing the Charge Parameters" in lines_params_host[i]:
to_begin = int(i)
if "Finish writing the Charge Parameters" in lines_params_host[i]:
to_end = int(i)
charge_params_host = lines_params_host[to_begin + 1 : to_end]
# Charge Parameters Guest
f_params_guest = open(self.guest_qm_params_file, "r")
lines_params_guest = f_params_guest.readlines()
# Charge Parameters Guest
for i in range(len(lines_params_guest)):
if "Begin writing the Charge Parameters" in lines_params_guest[i]:
to_begin = int(i)
if "Finish writing the Charge Parameters" in lines_params_guest[i]:
to_end = int(i)
charge_params_guest = lines_params_guest[to_begin + 1 : to_end]
charge_systems_params = charge_params_host + charge_params_guest
system_params = open(self.host_guest_qm_params_file, "w")
system_params.write("Begin writing the Bond Parameters" + "\n")
for i in range(len(bond_systems_params)):
system_params.write(bond_systems_params[i])
system_params.write("Finish writing the Bond Parameters" + "\n")
system_params.write("Begin writing the Angle Parameters" + "\n")
for i in range(len(angle_systems_params)):
system_params.write(angle_systems_params[i])
system_params.write("Finish writing the Angle Parameters" + "\n")
system_params.write("Begin writing the Charge Parameters" + "\n")
for i in range(len(charge_systems_params)):
system_params.write(charge_systems_params[i])
system_params.write("Finish writing the Charge Parameters" + "\n")
system_params.close()
[docs] def write_reparameterised_system_xml(self):
"""
Writes a reparameterised XML force field file for the system.
"""
# Bond Parameters
f_params = open(self.host_guest_qm_params_file, "r")
lines_params = f_params.readlines()
# Bond Parameters
for i in range(len(lines_params)):
if "Begin writing the Bond Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Bond Parameters" in lines_params[i]:
to_end = int(i)
bond_params = lines_params[to_begin + 1 : to_end]
index_search_replace_bond = []
for i in bond_params:
bond_line_to_replace = i
# print(bond_line_to_replace)
atom_number_list = [
re.findall("\d*\.?\d+", i)[3],
re.findall("\d*\.?\d+", i)[5],
]
# print(atom_number_list)
comb_1 = (
"p1="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[1]
+ '"'
+ "/>"
)
comb_2 = (
"p1="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[0]
+ '"'
+ "/>"
)
comb_list_bond = [comb_1, comb_2]
# print(comb_list_bond)
list_search_bond = [
search_in_file(file=self.system_xml, word=comb_1),
search_in_file(file=self.system_xml, word=comb_2),
]
# print(list_search_bond)
for j in range(len(list_search_bond)):
if list_search_bond[j] != []:
to_add = (list_search_bond[j], i)
# print(to_add)
index_search_replace_bond.append(to_add)
# Angle Parameters
for i in range(len(lines_params)):
if "Begin writing the Angle Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Angle Parameters" in lines_params[i]:
to_end = int(i)
angle_params = lines_params[to_begin + 1 : to_end]
index_search_replace_angle = []
for i in angle_params:
angle_line_to_replace = i
# print(angle_line_to_replace)
index_search_replace_angle = []
for i in angle_params:
angle_line_to_replace = i
# print(angle_line_to_replace)
atom_number_list = [
re.findall("\d*\.?\d+", i)[3],
re.findall("\d*\.?\d+", i)[5],
re.findall("\d*\.?\d+", i)[7],
]
# print(atom_number_list)
comb_1 = (
"p1="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[2]
+ '"'
+ "/>"
)
comb_2 = (
"p1="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[1]
+ '"'
+ "/>"
)
comb_3 = (
"p1="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[2]
+ '"'
+ "/>"
)
comb_4 = (
"p1="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[0]
+ '"'
+ "/>"
)
comb_5 = (
"p1="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[1]
+ '"'
+ "/>"
)
comb_6 = (
"p1="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[0]
+ '"'
+ "/>"
)
comb_list_angle = [comb_1, comb_2, comb_3, comb_4, comb_5, comb_6]
# print(comb_list_angle)
list_search_angle = [
search_in_file(file=self.system_xml, word=comb_1),
search_in_file(file=self.system_xml, word=comb_2),
search_in_file(file=self.system_xml, word=comb_3),
search_in_file(file=self.system_xml, word=comb_4),
search_in_file(file=self.system_xml, word=comb_5),
search_in_file(file=self.system_xml, word=comb_6),
]
# print(list_search_angle)
for j in range(len(list_search_angle)):
if list_search_angle[j] != []:
to_add = (list_search_angle[j], i)
# print(to_add)
index_search_replace_angle.append(to_add)
f_org = open(self.system_xml)
lines = f_org.readlines()
for i in range(len(index_search_replace_bond)):
line_number = index_search_replace_bond[i][0][0][0] - 1
line_to_replace = index_search_replace_bond[i][0][0][1]
line_to_replace_with = index_search_replace_bond[i][1]
lines[line_number] = line_to_replace_with
for i in range(len(index_search_replace_angle)):
line_number = index_search_replace_angle[i][0][0][0] - 1
line_to_replace = index_search_replace_angle[i][0][0][1]
line_to_replace_with = index_search_replace_angle[i][1]
lines[line_number] = line_to_replace_with
f_cop = open(self.reparameterised_intermediate_system_xml_file, "w")
for i in lines:
f_cop.write(i)
f_cop.close()
f_params = open(self.host_guest_qm_params_file)
lines_params = f_params.readlines()
# Charge Parameters
for i in range(len(lines_params)):
if "Begin writing the Charge Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Charge Parameters" in lines_params[i]:
to_end = int(i)
charge_params = lines_params[to_begin + 1 : to_end]
non_bonded_index = []
for k in charge_params:
non_bonded_index.append(int(re.findall("[-+]?\d*\.\d+|\d+", k)[3]))
charge_for_index = []
for k in charge_params:
charge_for_index.append(
float(re.findall("[-+]?\d*\.\d+|\d+", k)[0])
)
xml_off = open(self.system_xml)
xml_off_lines = xml_off.readlines()
for i in range(len(xml_off_lines)):
if "<GlobalParameters/>" in xml_off_lines[i]:
to_begin = int(i)
if "<Exceptions>" in xml_off_lines[i]:
to_end = int(i)
nonbond_params = xml_off_lines[to_begin + 4 : to_end - 1]
# print(len(nonbond_params))
f_non_bonded = open(self.system_xml_non_bonded_file, "w")
for x in nonbond_params:
f_non_bonded.write(x)
f_non_bonded = open(self.system_xml_non_bonded_file)
lines_non_bonded = f_non_bonded.readlines()
# print(len(lines_non_bonded))
lines_non_bonded_to_write = []
for i in range(len(non_bonded_index)):
line_ = lines_non_bonded[non_bonded_index[i]]
# print(line_)
eps = float(re.findall("[-+]?\d*\.\d+|\d+", line_)[0])
sig = float(re.findall("[-+]?\d*\.\d+|\d+", line_)[2])
line_to_replace = (
" "
+ "<Particle "
+ "eps="
+ '"'
+ str(eps)
+ '"'
+ " "
+ "q="
+ '"'
+ str(charge_for_index[i])
+ '"'
+ " "
+ "sig="
+ '"'
+ str(sig)
+ '"'
+ "/>"
)
lines_non_bonded_to_write.append(line_to_replace)
data_ = list(zip(non_bonded_index, lines_non_bonded_to_write))
df_non_bonded_params = pd.DataFrame(
data_, columns=["line_index", "line"]
)
# print(df_non_bonded_params.head())
f_non_bonded_ = open(self.system_xml_non_bonded_file)
lines_non_bonded_ = f_non_bonded_.readlines()
for i in range(len(lines_non_bonded_)):
if i in non_bonded_index:
lines_non_bonded_[i] = (
df_non_bonded_params.loc[
df_non_bonded_params.line_index == i, "line"
].values[0]
) + "\n"
# print(len(lines_non_bonded_))
f_write_non_bonded_reparams = open(
self.system_xml_non_bonded_reparams_file, "w"
)
for p in range(len(lines_non_bonded_)):
f_write_non_bonded_reparams.write(lines_non_bonded_[p])
f_write_non_bonded_reparams.close()
f_ = open(self.system_xml_non_bonded_reparams_file)
lines_ = f_.readlines()
print(len(lines_) == len(lines_non_bonded))
xml_off = open(self.reparameterised_intermediate_system_xml_file)
xml_off_lines = xml_off.readlines()
for i in range(len(xml_off_lines)):
if "<GlobalParameters/>" in xml_off_lines[i]:
to_begin = int(i)
if "<Exceptions>" in xml_off_lines[i]:
to_end = int(i)
lines_before_params = xml_off_lines[: to_begin + 4]
f__ = open(self.system_xml_non_bonded_reparams_file)
lines_params_non_bonded = f__.readlines()
lines_after_params = xml_off_lines[to_end - 1 :]
f_reparams_xml = open(self.reparameterised_system_xml_file, "w")
for x in lines_before_params:
f_reparams_xml.write(x)
for x in lines_params_non_bonded:
f_reparams_xml.write(x)
for x in lines_after_params:
f_reparams_xml.write(x)
f_reparams_xml.close()
[docs] def write_torsional_reparams(self):
"""
Generates a XML force field file for the system with reparameterized
torsional parameters of the ligand.
"""
no_host_atoms = get_num_host_atoms(self.host_pdb)
xml_tor = open(self.reparameterized_torsional_params_file, "r")
xml_tor_lines = xml_tor.readlines()
xml_tor_lines_renum = []
for i in xml_tor_lines:
i = i.replace(
"p1=" + '"' + str(int(re.findall("\d*\.?\d+", i)[2])) + '"',
"p1="
+ '"'
+ str(int(int(re.findall("\d*\.?\d+", i)[2]) + no_host_atoms))
+ '"',
)
i = i.replace(
"p2=" + '"' + str(int(re.findall("\d*\.?\d+", i)[4])) + '"',
"p2="
+ '"'
+ str(int(int(re.findall("\d*\.?\d+", i)[4]) + no_host_atoms))
+ '"',
)
i = i.replace(
"p3=" + '"' + str(int(re.findall("\d*\.?\d+", i)[6])) + '"',
"p3="
+ '"'
+ str(int(int(re.findall("\d*\.?\d+", i)[6]) + no_host_atoms))
+ '"',
)
i = i.replace(
"p4=" + '"' + str(int(re.findall("\d*\.?\d+", i)[8])) + '"',
"p4="
+ '"'
+ str(int(int(re.findall("\d*\.?\d+", i)[8]) + no_host_atoms))
+ '"',
)
xml_tor_lines_renum.append(i)
non_zero_k_tor = []
for i in xml_tor_lines_renum:
to_find = "k=" + '"' + "0.0" + '"'
if to_find not in i:
non_zero_k_tor.append(i)
# print(non_zero_k_tor)
p1 = []
for i in range(len(non_zero_k_tor)):
p1.append(int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[2]))
# print(p1)
p2 = []
for i in range(len(non_zero_k_tor)):
p2.append(int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[4]))
# print(p2)
p3 = []
for i in range(len(non_zero_k_tor)):
p3.append(int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[6]))
# print(p3)
p4 = []
for i in range(len(non_zero_k_tor)):
p4.append(int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[8]))
# print(p4)
periodicity = []
for i in range(len(non_zero_k_tor)):
periodicity.append(
int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[9])
)
# print(periodicity)
xml_tor_reparams = open(self.reparameterised_system_xml_file, "r")
xml_tor_reparams_lines = xml_tor_reparams.readlines()
for j in range(len(xml_tor_reparams_lines)):
for i in range(len(non_zero_k_tor)):
to_find_tor = (
"p1="
+ '"'
+ str(p1[i])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(p2[i])
+ '"'
+ " "
+ "p3="
+ '"'
+ str(p3[i])
+ '"'
+ " "
+ "p4="
+ '"'
+ str(p4[i])
+ '"'
+ " "
+ "periodicity="
+ '"'
+ str(periodicity[i])
+ '"'
)
if to_find_tor in xml_tor_reparams_lines[j]:
print(xml_tor_reparams_lines[j])
xml_tor_reparams_lines[j] = non_zero_k_tor[i]
with open(self.reparameterised_torsional_system_xml_file, "w") as f:
for i in xml_tor_reparams_lines:
f.write(i)
[docs] def save_amber_params_non_qm_charges(self):
"""
Saves amber generated topology files for the system
without the QM charges.
"""
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
openmm_system.save(self.prmtop_system_non_params, overwrite=True)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(self.inpcrd_system_non_params, overwrite=True)
parm = parmed.load_file(
self.prmtop_system_non_params, self.inpcrd_system_non_params
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_non_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_non_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(
self.reparameterised_intermediate_torsional_system_xml_file
),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(
self.reparameterised_intermediate_torsional_system_xml_file
),
)
openmm_system.save(
self.prmtop_system_intermediate_params, overwrite=True
)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(
self.inpcrd_system_intermediate_params, overwrite=True
)
parm = parmed.load_file(
self.prmtop_system_intermediate_params,
self.inpcrd_system_intermediate_params,
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(
self.reparameterised_intermediate_torsional_system_xml_file
),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)
[docs] def save_amber_params(self):
"""
Saves amber generated topology files for the system.
"""
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
openmm_system.save(self.prmtop_system_non_params, overwrite=True)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(self.inpcrd_system_non_params, overwrite=True)
parm = parmed.load_file(
self.prmtop_system_non_params, self.inpcrd_system_non_params
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_non_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_non_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(
self.reparameterised_torsional_system_xml_file
),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(
self.reparameterised_torsional_system_xml_file
),
)
openmm_system.save(self.prmtop_system_params, overwrite=True)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(self.inpcrd_system_params, overwrite=True)
parm = parmed.load_file(
self.prmtop_system_params, self.inpcrd_system_params
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(self.reparameterised_torsional_system_xml_file),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)
[docs]class SystemGuestAmberSystem:
"""
A class used to generate a force field XML file for the system
from the given amber forcefield topology files and
regenerate the reparameterised forcefield XML file but without
the host QM parameters.
This class contain methods to generate a XML force field through
parmed if the amber forcefield topology files are given.
Re-parameterized XML force field files are then generated from
these XML focefield files. Different energy components such as
bond, angle, torsional and non-bonded energies are computed for the
non-reparametrized and the reparameterized force fields. Difference
between the non-reparameterized and reparameterized force field energies
can then be analyzed.
...
Attributes
----------
host_pdb: str, optional
PDB file for the host.
system_pdb: str, optional
PDB file for the system (host, guest and solvent).
prmtop_system: str, optional
Topology file for the system (host, guest and solvent).
system_xml: str, optional
Serialised XML forcefield file generated by parmed.
charge_parameter_file_guest: str, optional
Receptor PDB file with atom numbers beginning from 1.
guest_qm_pdb: str, optional
Ligand PDB file with atom numbers beginning from 1.
bond_parameter_file_guest: str, optional
Text file containing the bond parameters for the ligand.
angle_parameter_file_guest: str, optional
Text file containing the angle parameters of the ligand.
guest_qm_params_file: str, optional
Text file containing QM obtained parameters for the ligand.
reparameterised_intermediate_system_xml_file: str, optional
XML force field file with bond and angle parameter lines replaced by
corresponding values obtained from the QM calculations.
system_xml_non_bonded_file: str, optional
Text file to write the NonBondedForce Charge Parameters from
the non-parameterised system XML file.
system_xml_non_bonded_reparams_file: str, optional
Text file containing the non-bonded parameters parsed from the
XML force field file.
reparameterised_system_xml_file: str, optional
Reparameterized force field XML file obtained using
openforcefield.
reparameterized_torsional_params_file : str, optional
Text file containing the forcefield parameters for the
ligand previously obtained without torsional reparameterization.
reparameterised_intermediate_torsional_system_xml_file : str, optional
XML force field file for the system (without the QM charges) obtained
with torsional reparamaterization.
reparameterised_torsional_system_xml_file : str, optional
XML force field file for the system obtained with
torsional reparamaterization.
load_topology: str, optional
Argument to specify how to load the topology. Can either be "openmm"
or "parmed".
non_reparameterised_system_xml_file: str, optional
Non-reparameterized force field XML file.
prmtop_system_non_params: str, optional
Non-reparameterized topology file.
inpcrd_system_non_params: str, optional
Non-reparameterized INPCRD file.
prmtop_system_intermediate_params: str, optional
Reparameterized topology file but without the QM charges.
inpcrd_system_intermediate_params: str, optional
Reparameterized INPCRD file but without the QM charges.
prmtop_system_params: str, optional
Reparameterized topology file.
inpcrd_system_params: str, optional
Reparameterized INPCRD file.
"""
def __init__(
self,
host_pdb="host.pdb",
system_pdb="",
prmtop_system="hostguest.parm7",
system_xml="hostguest.xml",
charge_parameter_file_guest="guest_qm_surround_charges.txt",
guest_qm_pdb="guest_init_II.pdb",
bond_parameter_file_guest="guest_bonds.txt",
angle_parameter_file_guest="guest_angles.txt",
guest_qm_params_file="guest_qm_params.txt",
reparameterised_intermediate_system_xml_file="hostguest_intermediate.xml",
system_xml_non_bonded_file="hostguest_non_bonded.txt",
system_xml_non_bonded_reparams_file="hostguest_non_bonded_reparams.txt",
reparameterised_system_xml_file="hostguest_reparameterised.xml",
reparameterized_torsional_params_file="reparameterized_torsional_params.txt",
reparameterised_intermediate_torsional_system_xml_file="reparameterized_torsional_params.txt",
reparameterised_torsional_system_xml_file="hostguest_torsional_reparameterised.xml",
load_topology="openmm",
non_reparameterised_system_xml_file="hostguest.xml",
prmtop_system_non_params="hostguest.parm7",
inpcrd_system_non_params="hostguest_non_params.pdb",
prmtop_system_intermediate_params="hostguest_intermediate.prmtop",
inpcrd_system_intermediate_params="hostguest_intermediate.inpcrd",
prmtop_system_params="hostguest_params.prmtop",
inpcrd_system_params="hostguest_params.inpcrd",
):
self.host_pdb = host_pdb
self.system_pdb = system_pdb
self.prmtop_system = prmtop_system
self.system_xml = system_xml
self.charge_parameter_file_guest = charge_parameter_file_guest
self.guest_qm_pdb = guest_qm_pdb
self.bond_parameter_file_guest = bond_parameter_file_guest
self.angle_parameter_file_guest = angle_parameter_file_guest
self.guest_qm_params_file = guest_qm_params_file
self.reparameterised_intermediate_system_xml_file = (
reparameterised_intermediate_system_xml_file
)
self.system_xml_non_bonded_file = system_xml_non_bonded_file
self.system_xml_non_bonded_reparams_file = (
system_xml_non_bonded_reparams_file
)
self.reparameterised_system_xml_file = reparameterised_system_xml_file
self.reparameterized_torsional_params_file = (
reparameterized_torsional_params_file
)
self.reparameterised_intermediate_torsional_system_xml_file = (
reparameterised_intermediate_torsional_system_xml_file
)
self.reparameterised_torsional_system_xml_file = (
reparameterised_torsional_system_xml_file
)
self.load_topology = load_topology
self.non_reparameterised_system_xml_file = (
non_reparameterised_system_xml_file
)
self.prmtop_system_non_params = prmtop_system_non_params
self.inpcrd_system_non_params = inpcrd_system_non_params
self.prmtop_system_intermediate_params = (
prmtop_system_intermediate_params
)
self.inpcrd_system_intermediate_params = (
inpcrd_system_intermediate_params
)
self.prmtop_system_params = prmtop_system_params
self.inpcrd_system_params = inpcrd_system_params
[docs] def generate_xml_from_prmtop(self):
"""
Generates a serialsed XML forcefield file through parmed, given
the PDB file and its corresponding topology file.
"""
parm = parmed.load_file(self.prmtop_system, self.system_pdb)
system = parm.createSystem()
with open(self.system_xml, "w") as f:
f.write(simtk.openmm.XmlSerializer.serialize(system))
[docs] def write_guest_params_non_zero(self):
"""
Saves the parameters of the ligand obtained from the QM log files
in a text file starting from non-zero ( indexing begins from the
index of the last atom of the receptor ).
"""
# Charges from QM files
df_charges = pd.read_csv(
self.charge_parameter_file_guest, header=None, delimiter=r"\s+"
)
df_charges.columns = ["atom", "charges"]
qm_charges = df_charges["charges"].values.tolist()
qm_charges = [round(num, 6) for num in qm_charges]
# print(qm_charges)
# Bond Parameters from QM files
ppdb = PandasPdb()
ppdb.read_pdb(self.guest_qm_pdb)
atom_name_list = ppdb.df["ATOM"]["atom_number"].values.tolist()
# atom_name_list = [i - 1 for i in atom_name_list]
no_host_atoms = get_num_host_atoms(self.host_pdb)
atom_name_list = [i - 1 + no_host_atoms for i in atom_name_list]
# print(atom_name_list)
df = pd.read_csv(
self.bond_parameter_file_guest, header=None, delimiter=r"\s+"
)
df.columns = ["bond", "k_bond", "bond_length", "bond_1", "bond_2"]
# print(df.head())
bond_1_list = df["bond_1"].values.tolist()
bond_1_list = [x - 1 + min(atom_name_list) for x in bond_1_list]
bond_2_list = df["bond_2"].values.tolist()
bond_2_list = [x - 1 + min(atom_name_list) for x in bond_2_list]
# print(bond_1_list)
# print(bond_2_list)
k_bond_list = df["k_bond"].values.tolist()
k_bond_list = [
i * KCAL_MOL_PER_KJ_MOL * ANGSTROMS_PER_NM**2 for i in k_bond_list
] # kcal/mol * A^2 to kJ/mol * nm^2
k_bond_list = [round(num, 10) for num in k_bond_list]
# print(k_bond_list)
bond_length_list = df["bond_length"].values.tolist()
bond_length_list = [i / 10.00 for i in bond_length_list]
bond_length_list = [round(num, 6) for num in bond_length_list]
# print(bond_length_list)
# Angle Parameters from QM files
ppdb = PandasPdb()
ppdb.read_pdb(self.guest_qm_pdb)
atom_name_list = ppdb.df["ATOM"]["atom_number"].values.tolist()
# atom_name_list = [i - 1 for i in atom_name_list]
no_host_atoms = get_num_host_atoms(self.host_pdb)
atom_name_list = [i - 1 + no_host_atoms for i in atom_name_list]
# print(atom_name_list)
df = pd.read_csv(
self.angle_parameter_file_guest, header=None, delimiter=r"\s+"
)
df.columns = [
"angle",
"k_angle",
"angle_degrees",
"angle_1",
"angle_2",
"angle_3",
]
# print(df.head())
angle_1_list = df["angle_1"].values.tolist()
angle_1_list = [x - 1 + min(atom_name_list) for x in angle_1_list]
# print(angle_1_list)
angle_2_list = df["angle_2"].values.tolist()
angle_2_list = [x - 1 + min(atom_name_list) for x in angle_2_list]
# print(angle_2_list)
angle_3_list = df["angle_3"].values.tolist()
angle_3_list = [x - 1 + min(atom_name_list) for x in angle_3_list]
# print(angle_3_list)
k_angle_list = df["k_angle"].values.tolist()
k_angle_list = [
i * KCAL_MOL_PER_KJ_MOL for i in k_angle_list
] # kcal/mol * radian^2 to kJ/mol * radian^2
k_angle_list = [round(num, 6) for num in k_angle_list]
# print(k_angle_list)
angle_list = df["angle_degrees"].values.tolist()
angle_list = [i * RADIANS_PER_DEGREE for i in angle_list]
angle_list = [round(num, 6) for num in angle_list]
# print(angle_list)
xml = open(self.guest_qm_params_file, "w")
xml.write("Begin writing the Bond Parameters" + "\n")
# TODO: use string formatting and templates to write these lines
for i in range(len(k_bond_list)):
xml.write(
" "
+ "<Bond"
+ " "
+ "d="
+ '"'
+ str(bond_length_list[i])
+ '"'
+ " "
+ "k="
+ '"'
+ str(k_bond_list[i])
+ '"'
+ " "
+ "p1="
+ '"'
+ str(bond_1_list[i])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(bond_2_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Bond Parameters" + "\n")
xml.write("Begin writing the Angle Parameters" + "\n")
for i in range(len(k_angle_list)):
xml.write(
" "
+ "<Angle"
+ " "
+ "a="
+ '"'
+ str(angle_list[i])
+ '"'
+ " "
+ "k="
+ '"'
+ str(k_angle_list[i])
+ '"'
+ " "
+ "p1="
+ '"'
+ str(angle_1_list[i])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(angle_2_list[i])
+ '"'
+ " "
+ "p3="
+ '"'
+ str(angle_3_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Angle Parameters" + "\n")
xml.write("Begin writing the Charge Parameters" + "\n")
for i in range(len(qm_charges)):
xml.write(
"<Particle"
+ " "
+ "q="
+ '"'
+ str(qm_charges[i])
+ '"'
+ " "
+ "eps="
+ '"'
+ str(0.00)
+ '"'
+ " "
+ "sig="
+ '"'
+ str(0.00)
+ '"'
+ " "
+ "atom="
+ '"'
+ str(atom_name_list[i])
+ '"'
+ "/>"
+ "\n"
)
xml.write("Finish writing the Charge Parameters" + "\n")
xml.close()
[docs] def write_reparameterised_system_xml(self):
"""
Writes a reparameterised XML force field file for the system.
"""
# Bond Parameters
with open(self.guest_qm_params_file, "r") as f_params:
lines_params = f_params.readlines()
# Bond Parameters
for i in range(len(lines_params)):
if "Begin writing the Bond Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Bond Parameters" in lines_params[i]:
to_end = int(i)
bond_params = lines_params[to_begin + 1 : to_end]
index_search_replace_bond = []
# TODO: again, use string formatting.
for i in bond_params:
bond_line_to_replace = i
# print(bond_line_to_replace)
atom_number_list = [
re.findall("\d*\.?\d+", i)[3],
re.findall("\d*\.?\d+", i)[5],
]
# print(atom_number_list)
comb_1 = (
"p1="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[1]
+ '"'
+ "/>"
)
comb_2 = (
"p1="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[0]
+ '"'
+ "/>"
)
comb_list_bond = [comb_1, comb_2]
# print(comb_list_bond)
list_search_bond = [
search_in_file(file=self.system_xml, word=comb_1),
search_in_file(file=self.system_xml, word=comb_2),
]
# print(list_search_bond)
for j in range(len(list_search_bond)):
if list_search_bond[j] != []:
to_add = (list_search_bond[j], i)
# print(to_add)
index_search_replace_bond.append(to_add)
# Angle Parameters
for i in range(len(lines_params)):
if "Begin writing the Angle Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Angle Parameters" in lines_params[i]:
to_end = int(i)
angle_params = lines_params[to_begin + 1 : to_end]
index_search_replace_angle = []
for i in angle_params:
angle_line_to_replace = i
# print(angle_line_to_replace)
index_search_replace_angle = []
# TODO: use string formatting (generalize to function?)
for i in angle_params:
angle_line_to_replace = i
# print(angle_line_to_replace)
atom_number_list = [
re.findall("\d*\.?\d+", i)[3],
re.findall("\d*\.?\d+", i)[5],
re.findall("\d*\.?\d+", i)[7],
]
# print(atom_number_list)
comb_1 = (
"p1="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[2]
+ '"'
+ "/>"
)
comb_2 = (
"p1="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[1]
+ '"'
+ "/>"
)
comb_3 = (
"p1="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[2]
+ '"'
+ "/>"
)
comb_4 = (
"p1="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[0]
+ '"'
+ "/>"
)
comb_5 = (
"p1="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[0]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[1]
+ '"'
+ "/>"
)
comb_6 = (
"p1="
+ '"'
+ atom_number_list[2]
+ '"'
+ " "
+ "p2="
+ '"'
+ atom_number_list[1]
+ '"'
+ " "
+ "p3="
+ '"'
+ atom_number_list[0]
+ '"'
+ "/>"
)
comb_list_angle = [comb_1, comb_2, comb_3, comb_4, comb_5, comb_6]
# print(comb_list_angle)
list_search_angle = [
search_in_file(file=self.system_xml, word=comb_1),
search_in_file(file=self.system_xml, word=comb_2),
search_in_file(file=self.system_xml, word=comb_3),
search_in_file(file=self.system_xml, word=comb_4),
search_in_file(file=self.system_xml, word=comb_5),
search_in_file(file=self.system_xml, word=comb_6),
]
# print(list_search_angle)
for j in range(len(list_search_angle)):
if list_search_angle[j] != []:
to_add = (list_search_angle[j], i)
# print(to_add)
index_search_replace_angle.append(to_add)
f_org = open(self.system_xml)
lines = f_org.readlines()
for i in range(len(index_search_replace_bond)):
line_number = index_search_replace_bond[i][0][0][0] - 1
line_to_replace = index_search_replace_bond[i][0][0][1]
line_to_replace_with = index_search_replace_bond[i][1]
lines[line_number] = line_to_replace_with
for i in range(len(index_search_replace_angle)):
line_number = index_search_replace_angle[i][0][0][0] - 1
line_to_replace = index_search_replace_angle[i][0][0][1]
line_to_replace_with = index_search_replace_angle[i][1]
lines[line_number] = line_to_replace_with
f_cop = open(self.reparameterised_intermediate_system_xml_file, "w")
for i in lines:
f_cop.write(i)
f_cop.close()
f_params = open(self.guest_qm_params_file)
lines_params = f_params.readlines()
# Charge Parameters
for i in range(len(lines_params)):
if "Begin writing the Charge Parameters" in lines_params[i]:
to_begin = int(i)
if "Finish writing the Charge Parameters" in lines_params[i]:
to_end = int(i)
charge_params = lines_params[to_begin + 1 : to_end]
non_bonded_index = []
for k in charge_params:
non_bonded_index.append(int(re.findall("[-+]?\d*\.\d+|\d+", k)[3]))
charge_for_index = []
for k in charge_params:
charge_for_index.append(
float(re.findall("[-+]?\d*\.\d+|\d+", k)[0])
)
xml_off = open(self.system_xml)
xml_off_lines = xml_off.readlines()
for i in range(len(xml_off_lines)):
if "<GlobalParameters/>" in xml_off_lines[i]:
to_begin = int(i)
if "<Exceptions>" in xml_off_lines[i]:
to_end = int(i)
nonbond_params = xml_off_lines[to_begin + 4 : to_end - 1]
# print(len(nonbond_params))
f_non_bonded = open(self.system_xml_non_bonded_file, "w")
for x in nonbond_params:
f_non_bonded.write(x)
f_non_bonded = open(self.system_xml_non_bonded_file)
lines_non_bonded = f_non_bonded.readlines()
# print(len(lines_non_bonded))
lines_non_bonded_to_write = []
for i in range(len(non_bonded_index)):
line_ = lines_non_bonded[non_bonded_index[i]]
# print(line_)
eps = float(re.findall("[-+]?\d*\.\d+|\d+", line_)[0])
sig = float(re.findall("[-+]?\d*\.\d+|\d+", line_)[2])
line_to_replace = (
" "
+ "<Particle "
+ "eps="
+ '"'
+ str(eps)
+ '"'
+ " "
+ "q="
+ '"'
+ str(charge_for_index[i])
+ '"'
+ " "
+ "sig="
+ '"'
+ str(sig)
+ '"'
+ "/>"
)
lines_non_bonded_to_write.append(line_to_replace)
data_ = list(zip(non_bonded_index, lines_non_bonded_to_write))
df_non_bonded_params = pd.DataFrame(
data_, columns=["line_index", "line"]
)
# print(df_non_bonded_params.head())
f_non_bonded_ = open(self.system_xml_non_bonded_file)
lines_non_bonded_ = f_non_bonded_.readlines()
for i in range(len(lines_non_bonded_)):
if i in non_bonded_index:
lines_non_bonded_[i] = (
df_non_bonded_params.loc[
df_non_bonded_params.line_index == i, "line"
].values[0]
) + "\n"
# print(len(lines_non_bonded_))
f_write_non_bonded_reparams = open(
self.system_xml_non_bonded_reparams_file, "w"
)
for p in range(len(lines_non_bonded_)):
f_write_non_bonded_reparams.write(lines_non_bonded_[p])
f_write_non_bonded_reparams.close()
f_ = open(self.system_xml_non_bonded_reparams_file)
lines_ = f_.readlines()
print(len(lines_) == len(lines_non_bonded))
xml_off = open(self.reparameterised_intermediate_system_xml_file)
xml_off_lines = xml_off.readlines()
for i in range(len(xml_off_lines)):
if "<GlobalParameters/>" in xml_off_lines[i]:
to_begin = int(i)
if "<Exceptions>" in xml_off_lines[i]:
to_end = int(i)
lines_before_params = xml_off_lines[: to_begin + 4]
f__ = open(self.system_xml_non_bonded_reparams_file)
lines_params_non_bonded = f__.readlines()
lines_after_params = xml_off_lines[to_end - 1 :]
f_reparams_xml = open(self.reparameterised_system_xml_file, "w")
for x in lines_before_params:
f_reparams_xml.write(x)
for x in lines_params_non_bonded:
f_reparams_xml.write(x)
for x in lines_after_params:
f_reparams_xml.write(x)
f_reparams_xml.close()
[docs] def write_torsional_reparams(self):
"""
Generates a XML force field file for the system with reparameterized
torsional parameters of the ligand.
"""
no_host_atoms = get_num_host_atoms(self.host_pdb)
with open(self.reparameterized_torsional_params_file, "r") as xml_tor:
xml_tor_lines = xml_tor.readlines()
xml_tor_lines_renum = []
# TODO: string formatting and clean up this code to be more concise
for i in xml_tor_lines:
i = i.replace(
"p1=" + '"' + str(int(re.findall("\d*\.?\d+", i)[2])) + '"',
"p1="
+ '"'
+ str(int(int(re.findall("\d*\.?\d+", i)[2]) + no_host_atoms))
+ '"',
)
i = i.replace(
"p2=" + '"' + str(int(re.findall("\d*\.?\d+", i)[4])) + '"',
"p2="
+ '"'
+ str(int(int(re.findall("\d*\.?\d+", i)[4]) + no_host_atoms))
+ '"',
)
i = i.replace(
"p3=" + '"' + str(int(re.findall("\d*\.?\d+", i)[6])) + '"',
"p3="
+ '"'
+ str(int(int(re.findall("\d*\.?\d+", i)[6]) + no_host_atoms))
+ '"',
)
i = i.replace(
"p4=" + '"' + str(int(re.findall("\d*\.?\d+", i)[8])) + '"',
"p4="
+ '"'
+ str(int(int(re.findall("\d*\.?\d+", i)[8]) + no_host_atoms))
+ '"',
)
xml_tor_lines_renum.append(i)
non_zero_k_tor = []
for i in xml_tor_lines_renum:
to_find = "k=" + '"' + "0.0" + '"'
if to_find not in i:
non_zero_k_tor.append(i)
# print(non_zero_k_tor)
p1 = []
for i in range(len(non_zero_k_tor)):
p1.append(int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[2]))
# print(p1)
p2 = []
for i in range(len(non_zero_k_tor)):
p2.append(int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[4]))
# print(p2)
p3 = []
for i in range(len(non_zero_k_tor)):
p3.append(int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[6]))
# print(p3)
p4 = []
for i in range(len(non_zero_k_tor)):
p4.append(int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[8]))
# print(p4)
periodicity = []
for i in range(len(non_zero_k_tor)):
periodicity.append(
int(re.findall("\d*\.?\d+", non_zero_k_tor[i])[9])
)
# print(periodicity)
xml_tor_reparams = open(self.reparameterised_system_xml_file, "r")
xml_tor_reparams_lines = xml_tor_reparams.readlines()
for j in range(len(xml_tor_reparams_lines)):
for i in range(len(non_zero_k_tor)):
to_find_tor = (
"p1="
+ '"'
+ str(p1[i])
+ '"'
+ " "
+ "p2="
+ '"'
+ str(p2[i])
+ '"'
+ " "
+ "p3="
+ '"'
+ str(p3[i])
+ '"'
+ " "
+ "p4="
+ '"'
+ str(p4[i])
+ '"'
+ " "
+ "periodicity="
+ '"'
+ str(periodicity[i])
+ '"'
)
if to_find_tor in xml_tor_reparams_lines[j]:
print(xml_tor_reparams_lines[j])
xml_tor_reparams_lines[j] = non_zero_k_tor[i]
with open(self.reparameterised_torsional_system_xml_file, "w") as f:
for i in xml_tor_reparams_lines:
f.write(i)
[docs] def save_amber_params_non_qm_charges(self):
"""
Saves amber generated topology files for the system
without the QM charges.
"""
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
openmm_system.save(self.prmtop_system_non_params, overwrite=True)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(self.inpcrd_system_non_params, overwrite=True)
parm = parmed.load_file(
self.prmtop_system_non_params, self.inpcrd_system_non_params
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_non_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_non_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(
self.reparameterised_intermediate_torsional_system_xml_file
),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(
self.reparameterised_intermediate_torsional_system_xml_file
),
)
openmm_system.save(
self.prmtop_system_intermediate_params, overwrite=True
)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(
self.inpcrd_system_intermediate_params, overwrite=True
)
parm = parmed.load_file(
self.prmtop_system_intermediate_params,
self.inpcrd_system_intermediate_params,
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(
self.reparameterised_intermediate_torsional_system_xml_file
),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)
[docs] def save_amber_params(self):
"""
Saves amber generated topology files for the system.
"""
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
openmm_system.save(self.prmtop_system_non_params, overwrite=True)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(self.inpcrd_system_non_params, overwrite=True)
parm = parmed.load_file(
self.prmtop_system_non_params, self.inpcrd_system_non_params
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(self.non_reparameterised_system_xml_file),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_non_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_non_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)
if self.load_topology == "parmed":
openmm_system = parmed.openmm.load_topology(
parmed.load_file(self.system_pdb, structure=True).topology,
parmed.load_file(
self.reparameterised_torsional_system_xml_file
),
)
if self.load_topology == "openmm":
openmm_system = parmed.openmm.load_topology(
simtk.openmm.app.PDBFile(self.system_pdb).topology,
parmed.load_file(
self.reparameterised_torsional_system_xml_file
),
)
openmm_system.save(self.prmtop_system_params, overwrite=True)
openmm_system.coordinates = parmed.load_file(
self.system_pdb, structure=True
).coordinates
openmm_system.save(self.inpcrd_system_params, overwrite=True)
parm = parmed.load_file(
self.prmtop_system_params, self.inpcrd_system_params
)
xml_energy_decomposition = parmed.openmm.energy_decomposition_system(
openmm_system,
parmed.load_file(self.reparameterised_torsional_system_xml_file),
)
xml_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in xml_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
xml_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_xml = pd.DataFrame(
list(
zip(
xml_energy_decomposition_list,
xml_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_xml_params"],
)
df_energy_xml = df_energy_xml.set_index("Energy_term")
prmtop_energy_decomposition = parmed.openmm.energy_decomposition_system(
parm, parm.createSystem()
)
prmtop_energy_decomposition_value = [
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicBondForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("HarmonicAngleForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("PeriodicTorsionForce"),
list_to_dict(
[
item
for sublist in [
list(elem) for elem in prmtop_energy_decomposition
]
for item in sublist
]
).get("NonbondedForce"),
]
prmtop_energy_decomposition_list = [
"HarmonicBondForce",
"HarmonicAngleForce",
"PeriodicTorsionForce",
"NonbondedForce",
]
df_energy_prmtop = pd.DataFrame(
list(
zip(
prmtop_energy_decomposition_list,
prmtop_energy_decomposition_value,
)
),
columns=["Energy_term", "Energy_prmtop_params"],
)
df_energy_prmtop = df_energy_prmtop.set_index("Energy_term")
df_compare = pd.concat([df_energy_xml, df_energy_prmtop], axis=1)
print(df_compare)