Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 26 additions & 21 deletions surface_analyses/commandline_electrostatic.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def parse_args(argv=None):
add_trajectory_options_to_parser(parser)
parser.add_argument('--dx', type=str, default=None, nargs='?', help="Optional dx file with the electrostatic potential. If this is omitted, you must specify --apbs_dir")
parser.add_argument('--apbs_dir', help="Directory in which intermediate files are stored when running APBS. Will be created if it does not exist.", type=str, default=None)
parser.add_argument('--probe_radius', type=float, help='probe radius in Angstrom', default=1.4)
parser.add_argument('--probe_radius', type=float, help='Probe radius in nm', default=0.14)
parser.add_argument('-o', '--out', default=None, type=str, help='Output csv file.')
parser.add_argument(
'-c', '--patch_cutoff',
Expand Down Expand Up @@ -146,7 +146,7 @@ def run_electrostatics(
traj: md.Trajectory,
dx: str = None,
apbs_dir: str = None,
probe_radius: float = 1.4,
probe_radius: float = 0.14,
out: str = None,
patch_cutoff: tuple = (2., -2.),
integral_cutoff: tuple = (0.3, -0.3),
Expand Down Expand Up @@ -191,31 +191,23 @@ def run_electrostatics(

if dx is not None:
griddata = load_dx(dx, colname='DX')
griddata.struct = traj[0]
else:
griddata = get_apbs_potential_from_mdtraj(traj, apbs_dir, pH, ion_species)

# *10 because mdtraj calculates stuff in nanometers instead of Angstrom.
radii = 10. * np.array([atom.element.radius for atom in traj.top.atoms])
grid = griddata.grid.scale(0.1)
columns = ['DX']

print('Run info:')
pprint.pprint({
'#Atoms': traj.n_atoms,
'Grid dimensions': griddata.grid.shape,
'Grid dimensions': grid.shape,
# **kwargs,
})

print('Calculating triangulated SASA')
radii = np.array([atom.element.radius for atom in traj.top.atoms])
coord = traj.xyz[0]

if surface_type == 'sas':
surf = compute_sas(griddata.grid, griddata.coord, radii, probe_radius)
elif surface_type == 'gauss':
surf = compute_gauss_surf(griddata.grid, griddata.coord, radii, gauss_shift, gauss_scale)
elif surface_type == 'ses':
surf = compute_ses(griddata.grid, griddata.coord, radii, probe_radius)
else:
raise ValueError("Unknown surface type: " + str(surface_type))
print('Calculating triangulated SASA')
surf = calculate_surface(surface_type, grid, coord, radii, probe_radius, gauss_shift, gauss_scale)

if check_cdrs:
try:
Expand All @@ -234,7 +226,7 @@ def run_electrostatics(
cdrs = []
residues = np.array([str(a.residue) for a in traj.top.atoms])

pdbtree = cKDTree(traj.xyz[0] * 10.)
pdbtree = cKDTree(coord)
_, closest_atom = pdbtree.query(surf.vertices)

# Calculate the area of each triangle, and split evenly among the vertices.
Expand All @@ -245,7 +237,8 @@ def run_electrostatics(

# The patch searching
print('Finding patches')
values = griddata.interpolate(columns, surf.vertices)[columns[0]]
# the griddata object is in Angstroms
values = griddata.interpolate(columns, surf.vertices * 10)[columns[0]]
patches = pd.DataFrame({
'positive': assign_patches(surf.faces, values > patch_cutoff[0]),
'negative': assign_patches(surf.faces, values < patch_cutoff[1]),
Expand Down Expand Up @@ -286,10 +279,10 @@ def run_electrostatics(
write_patches(patches, output, 'negative')

# Compute the total solvent-accessible potential.
within_range, closest_atom, distance = griddata.distance_to_spheres(rmax=10, atomic_radii=radii)
within_range, closest_atom, distance = grid.distance_to_spheres(centers=traj.xyz[0], rmax=1., radii=radii)
not_protein = distance > probe_radius
accessible = within_range[not_protein]
voxel_volume = griddata.grid.voxel_volume
voxel_volume = grid.voxel_volume
accessible_data = griddata[columns[0]].values[accessible]
integral = np.sum(accessible_data) * voxel_volume
integral_high = np.sum(np.maximum(accessible_data - integral_cutoff[0], 0)) * voxel_volume
Expand Down Expand Up @@ -334,6 +327,19 @@ def run_electrostatics(
}


def calculate_surface(surf_type, grid, coord, radii, probe_radius, gauss_shift, gauss_scale) -> Surface:
"""Calculate a surface. Note: all inputs should be in nm."""
if surf_type == 'sas':
surf = compute_sas(grid, coord, radii, probe_radius)
elif surf_type == 'gauss':
surf = compute_gauss_surf(grid, coord, radii, gauss_shift, gauss_scale)
elif surf_type == 'ses':
surf = compute_ses(grid, coord, radii, probe_radius)
else:
raise ValueError("Unknown surface type: " + str(surf_type))
return surf


IonSpecies = namedtuple("IonSpecies", "charge concentration radius")

DEFAULT_ION_SPECIES = [IonSpecies(1.0, 0.1, 2.0), IonSpecies(-1.0, 0.1, 2.0)]
Expand Down Expand Up @@ -380,7 +386,6 @@ def get_apbs_potential_from_mdtraj(traj, apbs_dir, pH, ion_species):
else:
raise ValueError("Neither apbs.pqr-PE0.dx nor apbs.pqr.dx were found in the apbs directory.")
griddata = load_dx(dxfile, colname='DX')
griddata.struct = traj[0]
return griddata

def write_patches(df, out, column):
Expand Down
2 changes: 1 addition & 1 deletion surface_analyses/hydrophobic_potential.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from skimage.measure import marching_cubes
import numpy as np

from .surface import Surface, gaussian_grid, compute_ses
from .surface import gaussian_grid, compute_ses

# Todo: don't use a cubic grid for hydrophobic_potential. Rectangular is more
# efficient.
Expand Down
24 changes: 17 additions & 7 deletions surface_analyses/surface.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __init__(self, vertices, faces):
Parameters
----------
vertices: np.ndarray, shape=(n_verts, 3)
vertices in Angstrom. Will be converted to nm, when writing a ply file.
vertices in nm.
faces: np.ndarray, shape=(n_verts, 3)
Triangles defined as 3 integer indices for vertices.
"""
Expand Down Expand Up @@ -89,11 +89,11 @@ def vertex_areas(self):
def __repr__(self):
return f"{self.__class__.__name__}({self.n_vertices} vertices, {self.n_faces} faces)"

def as_plydata(self, text=True, units_per_angstrom=0.01):
def as_plydata(self, text=True, units_per_angstrom=0.1):
"""Convert to a plyfile.PlyData object, while scaling coordinates

the default scaling units_per_angstrom=0.01 matches the PyMol CGO
scaling factor of 1:100.
the default scaling units_per_angstrom=0.1 matches the PyMol CGO
scaling factor of 1:100 and the nm -> A conversion.
"""
vertex_dtype = [("x", "f4"), ("y", "f4"), ("z", "f4")]
keys = list(self.data.keys())
Expand Down Expand Up @@ -121,8 +121,9 @@ def as_plydata(self, text=True, units_per_angstrom=0.01):
return out

def write_ply(self, fname, coordinate_scaling=1.):
"Write a PLY file, scaling is relative to the default of 0.1."
with open(fname, mode="wb") as f:
self.as_plydata(units_per_angstrom=coordinate_scaling*0.01).write(f)
self.as_plydata(units_per_angstrom=coordinate_scaling*0.1).write(f)

@classmethod
def from_plydata(cls, plydata):
Expand Down Expand Up @@ -268,6 +269,11 @@ def compute_sas(grid, xyz, radii, solvent_radius=0.14):


def compute_sas_msms(centers, radii, solvent_radius):
"""Compute a solvent-accessible surface area in msms.

SAS is not implemented in msms, but can be approximated using a very small
solvent radius. The inputs should be in nanometer (nm) units!
"""
shifted_radii = np.array(radii) + solvent_radius
tiny = 0.01
return compute_ses_msms(centers, shifted_radii, solvent_radius=tiny)
Expand Down Expand Up @@ -342,7 +348,11 @@ def compute_ses_gisttools(grid, xyz, radii, solvent_radius=0.14):


def compute_ses_msms(xyz, radii, solvent_radius=0.14, density=3.0):
"""Compute a SES using MSMS."""
"""Compute a SES using MSMS.

All inputs should be in nanometer (nm) units, except for the density, which
is passed to msms as-is.
"""
# conversion nm -> Angstrom
msms_out = msms.run_msms(xyz*10, radii*10, probe_radius=solvent_radius*10, density=density)
vertices = msms_out.get_vertex_positions() / 10
Expand Down Expand Up @@ -376,7 +386,7 @@ def gaussian_grid(grid, xyz, sigma):


def gaussian_grid_variable_sigma(grid, xyz, sigma, rmax=None):
"""sum(exp(-dist(x, x_i)**2)/(2*sigma_i**2)) at each grid point x, where
"""sum(exp((-dist(x, x_i)**2)/(2*sigma_i**2))) at each grid point x, where
the sum is over every x_i out of xyz.

Parameters
Expand Down
14 changes: 7 additions & 7 deletions test/commandline_electrostatic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pathlib import Path
import os
import shutil
from unittest.mock import patch

import pandas as pd
from pandas.testing import assert_frame_equal
Expand Down Expand Up @@ -65,11 +66,11 @@ def with_or_without_cdrs(request):
def test_trastuzumab_sas_integrals(with_or_without_cdrs):
expected = np.array(
[
25015.40424103,
12573.01033872,
29718.71768997,
-4703.31344894,
-1867.65861091,
25.01540424103,
12.57301033872,
29.71871768997,
-4.70331344894,
-1.86765861091,
]
)
if with_or_without_cdrs == 'with':
Expand All @@ -96,7 +97,6 @@ def test_trastuzumab_sas_integrals(with_or_without_cdrs):
expected_patches = pd.read_csv(str(TRASTUZUMAB_PATH / exp_fname))
if with_or_without_cdrs == 'without':
expected_patches['cdr'] = False
print(expected_patches['cdr'].sum(), patches['cdr'].sum())
assert_frame_equal(patches, expected_patches)


Expand All @@ -120,7 +120,7 @@ def test_trastuzumab_ply_out():
# check the number of vertices in the output
with open(TRASTUZUMAB_PATH / 'apbs-potential.ply') as f:
if msms.msms_available():
assert get_nth_line(f, 2).strip() == "element vertex 43411"
assert get_nth_line(f, 2).strip() == "element vertex 40549"
else:
assert get_nth_line(f, 2).strip() == "element vertex 67252"

Expand Down
65 changes: 32 additions & 33 deletions test/trastuzumab/apbs-patches-msms.save
Original file line number Diff line number Diff line change
@@ -1,34 +1,33 @@
type,npoints,area,cdr,main_residue
positive,1863,431.54070189258994,True,ARG59
positive,1547,345.90682158468985,True,LYS166
positive,509,116.7485765833226,False,LYS224
positive,239,50.16736294916978,False,PRO14
positive,224,44.41874485333689,False,GLN82
positive,169,28.95821309534873,True,THR74
positive,152,27.99561352816951,True,ARG187
positive,129,23.216873083982687,True,GLU1
positive,113,20.46684395168215,False,ARG182
positive,90,17.829391566192037,False,GLY189
positive,101,13.83507011562028,True,TYR170
positive,29,4.595708916427887,False,THR195
positive,10,2.6442337743944853,False,SER85
positive,14,2.3280992398040787,False,THR69
positive,23,2.097231570286438,True,PHE27
positive,14,2.04419205063933,False,SER198
positive,11,1.986901989790291,False,ARG87
positive,17,1.285559272976008,False,SER197
positive,9,1.1505531487664118,True,TRP47
positive,3,0.38534614109958326,True,ARG145
positive,6,0.37090140750726835,False,ARG182
positive,7,0.04858950236157684,False,LYS166
negative,259,65.43616275570122,True,ASP31
negative,302,64.7879934150777,False,GLU89
negative,32,7.416446479527371,False,ASP138
negative,28,4.260605361192842,False,GLU226
negative,23,2.6213427555188056,True,THR32
negative,19,2.212451793625336,True,LEU175
negative,21,2.0886264474594762,False,GLN13
negative,8,1.0222647085405423,True,GLY100
negative,3,0.6006055508090062,False,SER188
negative,3,0.3172385767901459,True,GLY101
negative,9,0.2869369854197508,False,GLU46
positive,1725,4.182278508729034,True,ARG59
positive,1345,3.3018772409740245,True,LYS166
positive,474,1.1227447014770573,False,LYS224
positive,212,0.4780537234772543,False,PRO14
positive,196,0.42350239444585136,False,GLN82
positive,153,0.26352678455568534,True,THR74
positive,131,0.2623972855174914,True,ARG187
positive,113,0.2239198468055985,True,GLU1
positive,91,0.18047135288680483,False,ARG182
positive,82,0.17243666156532797,False,ARG187
positive,85,0.11763809617857156,True,TYR170
positive,24,0.04135049908954891,False,THR195
positive,10,0.027526301024288966,False,SER85
positive,12,0.02167615367273435,False,SER198
positive,12,0.019511131671895172,False,THR69
positive,7,0.015586532451777404,False,ARG87
positive,18,0.01556431292971975,True,PHE27
positive,8,0.006207044981287775,True,PHE219
positive,5,0.0058994152378983485,False,SER197
positive,3,0.005203025153503235,False,ARG182
positive,6,0.00519997560517816,False,ARG182
positive,3,0.003164115750466868,True,ARG145
positive,2,0.0025863522808405913,True,LYS30
negative,241,0.6363343407004047,True,ASP31
negative,268,0.6162607965834226,False,GLU89
negative,27,0.07296024063999074,False,ASP138
negative,25,0.04096139412400949,False,GLU226
negative,23,0.022811577815085794,True,THR32
negative,17,0.016368615194651954,True,LEU175
negative,14,0.015925282524936708,False,GLN13
negative,7,0.011160988364316836,True,GLY100
negative,6,0.010733508585959266,False,ALA40
72 changes: 36 additions & 36 deletions test/trastuzumab/apbs-patches.save
Original file line number Diff line number Diff line change
@@ -1,37 +1,37 @@
type,npoints,area,cdr,main_residue
positive,2706,405.5523245886125,True,ARG59
positive,2050,313.58175296220804,True,LYS166
positive,714,107.85564371527026,False,LYS224
positive,269,43.49029152878992,False,PRO14
positive,269,40.06015391376671,False,GLN82
positive,164,24.193549860998854,True,ARG187
positive,146,23.264461253442278,True,THR74
positive,138,21.455028005128646,True,GLU1
positive,120,17.16530442445461,False,ARG187
positive,119,17.067557218893242,False,ARG182
positive,66,10.186808113730878,True,TYR170
positive,22,3.3606860217405483,False,THR195
positive,30,3.0287956125102933,True,ALA79
positive,15,2.3222784647950907,False,SER85
positive,9,1.524505876004696,False,TYR60
positive,14,1.1946246155227223,False,SER198
positive,6,0.7587038942923148,True,PHE27
positive,3,0.633904864701132,False,ARG87
positive,2,0.4148370213806629,False,ARG139
positive,1,0.11851314455270767,True,THR218
positive,6,0.10143411136232316,False,LYS166
positive,6,0.06065049464814365,False,ILE34
positive,6,0.025959379854612052,True,LYS30
positive,6,0.00643309680162929,False,LYS166
positive,6,6.140272148513759e-06,True,TRP99
negative,379,60.80976378557092,True,ASP31
negative,375,56.702972870492886,False,GLU89
negative,44,6.2794784369907575,False,ASP138
negative,18,3.5186013849840183,False,GLU226
negative,20,1.6015525545711475,True,THR32
negative,5,0.9135120759407679,False,LEU11
negative,5,0.7823746613382052,False,ALA40
negative,3,0.4150233045220375,True,ASP108
negative,5,0.31328256490329903,True,LEU175
negative,4,0.1601275751988093,False,SER181
negative,6,0.1596132023259997,False,ALA40
positive,2706,4.0555237192411315,True,ARG59
positive,2050,3.1358180610606072,True,LYS166
positive,714,1.0785564850219775,False,LYS224
positive,269,0.4349030355542176,False,PRO14
positive,269,0.4006016732560038,False,GLN82
positive,164,0.24193543018723437,True,ARG187
positive,146,0.23264465711205656,True,THR74
positive,138,0.2145502520054379,True,GLU1
positive,120,0.1716530068706561,False,ARG187
positive,119,0.1706755639411952,False,ARG182
positive,66,0.10186810414633858,True,TYR170
positive,22,0.033606791475904174,False,THR195
positive,30,0.030287938567198577,True,ALA79
positive,15,0.023222786999516153,False,SER85
positive,9,0.015245096806514388,False,TYR60
positive,14,0.011946238389706801,False,SER198
positive,6,0.007587021401074405,True,PHE27
positive,3,0.00633904083467011,False,ARG87
positive,2,0.004148372012423352,False,ARG139
positive,1,0.001185125942962865,True,THR218
positive,6,0.0010143504841835236,False,LYS166
positive,6,0.0006065153211238794,False,ILE34
positive,6,0.00025960786297218874,True,LYS30
positive,6,6.43316927835258e-05,False,LYS166
positive,6,6.1429446418515e-08,True,TRP99
negative,379,0.6080976480519841,True,ASP31
negative,375,0.5670296154251784,False,GLU89
negative,44,0.06279480835187692,False,ASP138
negative,18,0.03518602688154715,False,GLU226
negative,20,0.016015456201785128,True,THR32
negative,5,0.00913510504324222,False,LEU11
negative,5,0.0078237506319662,False,ALA40
negative,3,0.004150241366005503,True,ASP108
negative,5,0.003132833357085474,True,LEU175
negative,4,0.0016012810156098565,False,SER181
negative,6,0.001596131180122029,False,ALA40
Loading