diff --git a/surface_analyses/commandline_electrostatic.py b/surface_analyses/commandline_electrostatic.py index da5a3ae..a0f6b0a 100644 --- a/surface_analyses/commandline_electrostatic.py +++ b/surface_analyses/commandline_electrostatic.py @@ -53,7 +53,7 @@ def parse_args(argv=None): add_trajectory_options_to_parser(parser) parser.add_argument('--dx', type=str, default=None, nargs='?', help="Optional dx file with the electrostatic potential. If this is omitted, you must specify --apbs_dir") parser.add_argument('--apbs_dir', help="Directory in which intermediate files are stored when running APBS. Will be created if it does not exist.", type=str, default=None) - parser.add_argument('--probe_radius', type=float, help='probe radius in Angstrom', default=1.4) + parser.add_argument('--probe_radius', type=float, help='Probe radius in nm', default=0.14) parser.add_argument('-o', '--out', default=None, type=str, help='Output csv file.') parser.add_argument( '-c', '--patch_cutoff', @@ -146,7 +146,7 @@ def run_electrostatics( traj: md.Trajectory, dx: str = None, apbs_dir: str = None, - probe_radius: float = 1.4, + probe_radius: float = 0.14, out: str = None, patch_cutoff: tuple = (2., -2.), integral_cutoff: tuple = (0.3, -0.3), @@ -191,31 +191,23 @@ def run_electrostatics( if dx is not None: griddata = load_dx(dx, colname='DX') - griddata.struct = traj[0] else: griddata = get_apbs_potential_from_mdtraj(traj, apbs_dir, pH, ion_species) - - # *10 because mdtraj calculates stuff in nanometers instead of Angstrom. - radii = 10. * np.array([atom.element.radius for atom in traj.top.atoms]) + grid = griddata.grid.scale(0.1) columns = ['DX'] print('Run info:') pprint.pprint({ '#Atoms': traj.n_atoms, - 'Grid dimensions': griddata.grid.shape, + 'Grid dimensions': grid.shape, # **kwargs, }) - print('Calculating triangulated SASA') + radii = np.array([atom.element.radius for atom in traj.top.atoms]) + coord = traj.xyz[0] - if surface_type == 'sas': - surf = compute_sas(griddata.grid, griddata.coord, radii, probe_radius) - elif surface_type == 'gauss': - surf = compute_gauss_surf(griddata.grid, griddata.coord, radii, gauss_shift, gauss_scale) - elif surface_type == 'ses': - surf = compute_ses(griddata.grid, griddata.coord, radii, probe_radius) - else: - raise ValueError("Unknown surface type: " + str(surface_type)) + print('Calculating triangulated SASA') + surf = calculate_surface(surface_type, grid, coord, radii, probe_radius, gauss_shift, gauss_scale) if check_cdrs: try: @@ -234,7 +226,7 @@ def run_electrostatics( cdrs = [] residues = np.array([str(a.residue) for a in traj.top.atoms]) - pdbtree = cKDTree(traj.xyz[0] * 10.) + pdbtree = cKDTree(coord) _, closest_atom = pdbtree.query(surf.vertices) # Calculate the area of each triangle, and split evenly among the vertices. @@ -245,7 +237,8 @@ def run_electrostatics( # The patch searching print('Finding patches') - values = griddata.interpolate(columns, surf.vertices)[columns[0]] + # the griddata object is in Angstroms + values = griddata.interpolate(columns, surf.vertices * 10)[columns[0]] patches = pd.DataFrame({ 'positive': assign_patches(surf.faces, values > patch_cutoff[0]), 'negative': assign_patches(surf.faces, values < patch_cutoff[1]), @@ -286,10 +279,10 @@ def run_electrostatics( write_patches(patches, output, 'negative') # Compute the total solvent-accessible potential. - within_range, closest_atom, distance = griddata.distance_to_spheres(rmax=10, atomic_radii=radii) + within_range, closest_atom, distance = grid.distance_to_spheres(centers=traj.xyz[0], rmax=1., radii=radii) not_protein = distance > probe_radius accessible = within_range[not_protein] - voxel_volume = griddata.grid.voxel_volume + voxel_volume = grid.voxel_volume accessible_data = griddata[columns[0]].values[accessible] integral = np.sum(accessible_data) * voxel_volume integral_high = np.sum(np.maximum(accessible_data - integral_cutoff[0], 0)) * voxel_volume @@ -334,6 +327,19 @@ def run_electrostatics( } +def calculate_surface(surf_type, grid, coord, radii, probe_radius, gauss_shift, gauss_scale) -> Surface: + """Calculate a surface. Note: all inputs should be in nm.""" + if surf_type == 'sas': + surf = compute_sas(grid, coord, radii, probe_radius) + elif surf_type == 'gauss': + surf = compute_gauss_surf(grid, coord, radii, gauss_shift, gauss_scale) + elif surf_type == 'ses': + surf = compute_ses(grid, coord, radii, probe_radius) + else: + raise ValueError("Unknown surface type: " + str(surf_type)) + return surf + + IonSpecies = namedtuple("IonSpecies", "charge concentration radius") DEFAULT_ION_SPECIES = [IonSpecies(1.0, 0.1, 2.0), IonSpecies(-1.0, 0.1, 2.0)] @@ -380,7 +386,6 @@ def get_apbs_potential_from_mdtraj(traj, apbs_dir, pH, ion_species): else: raise ValueError("Neither apbs.pqr-PE0.dx nor apbs.pqr.dx were found in the apbs directory.") griddata = load_dx(dxfile, colname='DX') - griddata.struct = traj[0] return griddata def write_patches(df, out, column): diff --git a/surface_analyses/hydrophobic_potential.py b/surface_analyses/hydrophobic_potential.py index c42bf71..9ddc188 100644 --- a/surface_analyses/hydrophobic_potential.py +++ b/surface_analyses/hydrophobic_potential.py @@ -7,7 +7,7 @@ from skimage.measure import marching_cubes import numpy as np -from .surface import Surface, gaussian_grid, compute_ses +from .surface import gaussian_grid, compute_ses # Todo: don't use a cubic grid for hydrophobic_potential. Rectangular is more # efficient. diff --git a/surface_analyses/surface.py b/surface_analyses/surface.py index 5ee35d6..dff8ec0 100644 --- a/surface_analyses/surface.py +++ b/surface_analyses/surface.py @@ -34,7 +34,7 @@ def __init__(self, vertices, faces): Parameters ---------- vertices: np.ndarray, shape=(n_verts, 3) - vertices in Angstrom. Will be converted to nm, when writing a ply file. + vertices in nm. faces: np.ndarray, shape=(n_verts, 3) Triangles defined as 3 integer indices for vertices. """ @@ -89,11 +89,11 @@ def vertex_areas(self): def __repr__(self): return f"{self.__class__.__name__}({self.n_vertices} vertices, {self.n_faces} faces)" - def as_plydata(self, text=True, units_per_angstrom=0.01): + def as_plydata(self, text=True, units_per_angstrom=0.1): """Convert to a plyfile.PlyData object, while scaling coordinates - the default scaling units_per_angstrom=0.01 matches the PyMol CGO - scaling factor of 1:100. + the default scaling units_per_angstrom=0.1 matches the PyMol CGO + scaling factor of 1:100 and the nm -> A conversion. """ vertex_dtype = [("x", "f4"), ("y", "f4"), ("z", "f4")] keys = list(self.data.keys()) @@ -121,8 +121,9 @@ def as_plydata(self, text=True, units_per_angstrom=0.01): return out def write_ply(self, fname, coordinate_scaling=1.): + "Write a PLY file, scaling is relative to the default of 0.1." with open(fname, mode="wb") as f: - self.as_plydata(units_per_angstrom=coordinate_scaling*0.01).write(f) + self.as_plydata(units_per_angstrom=coordinate_scaling*0.1).write(f) @classmethod def from_plydata(cls, plydata): @@ -268,6 +269,11 @@ def compute_sas(grid, xyz, radii, solvent_radius=0.14): def compute_sas_msms(centers, radii, solvent_radius): + """Compute a solvent-accessible surface area in msms. + + SAS is not implemented in msms, but can be approximated using a very small + solvent radius. The inputs should be in nanometer (nm) units! + """ shifted_radii = np.array(radii) + solvent_radius tiny = 0.01 return compute_ses_msms(centers, shifted_radii, solvent_radius=tiny) @@ -342,7 +348,11 @@ def compute_ses_gisttools(grid, xyz, radii, solvent_radius=0.14): def compute_ses_msms(xyz, radii, solvent_radius=0.14, density=3.0): - """Compute a SES using MSMS.""" + """Compute a SES using MSMS. + + All inputs should be in nanometer (nm) units, except for the density, which + is passed to msms as-is. + """ # conversion nm -> Angstrom msms_out = msms.run_msms(xyz*10, radii*10, probe_radius=solvent_radius*10, density=density) vertices = msms_out.get_vertex_positions() / 10 @@ -376,7 +386,7 @@ def gaussian_grid(grid, xyz, sigma): def gaussian_grid_variable_sigma(grid, xyz, sigma, rmax=None): - """sum(exp(-dist(x, x_i)**2)/(2*sigma_i**2)) at each grid point x, where + """sum(exp((-dist(x, x_i)**2)/(2*sigma_i**2))) at each grid point x, where the sum is over every x_i out of xyz. Parameters diff --git a/test/commandline_electrostatic_test.py b/test/commandline_electrostatic_test.py index 55cf666..8ca709d 100644 --- a/test/commandline_electrostatic_test.py +++ b/test/commandline_electrostatic_test.py @@ -8,6 +8,7 @@ from pathlib import Path import os import shutil +from unittest.mock import patch import pandas as pd from pandas.testing import assert_frame_equal @@ -65,11 +66,11 @@ def with_or_without_cdrs(request): def test_trastuzumab_sas_integrals(with_or_without_cdrs): expected = np.array( [ - 25015.40424103, - 12573.01033872, - 29718.71768997, - -4703.31344894, - -1867.65861091, + 25.01540424103, + 12.57301033872, + 29.71871768997, + -4.70331344894, + -1.86765861091, ] ) if with_or_without_cdrs == 'with': @@ -96,7 +97,6 @@ def test_trastuzumab_sas_integrals(with_or_without_cdrs): expected_patches = pd.read_csv(str(TRASTUZUMAB_PATH / exp_fname)) if with_or_without_cdrs == 'without': expected_patches['cdr'] = False - print(expected_patches['cdr'].sum(), patches['cdr'].sum()) assert_frame_equal(patches, expected_patches) @@ -120,7 +120,7 @@ def test_trastuzumab_ply_out(): # check the number of vertices in the output with open(TRASTUZUMAB_PATH / 'apbs-potential.ply') as f: if msms.msms_available(): - assert get_nth_line(f, 2).strip() == "element vertex 43411" + assert get_nth_line(f, 2).strip() == "element vertex 40549" else: assert get_nth_line(f, 2).strip() == "element vertex 67252" diff --git a/test/trastuzumab/apbs-patches-msms.save b/test/trastuzumab/apbs-patches-msms.save index 1808595..231a753 100644 --- a/test/trastuzumab/apbs-patches-msms.save +++ b/test/trastuzumab/apbs-patches-msms.save @@ -1,34 +1,33 @@ type,npoints,area,cdr,main_residue -positive,1863,431.54070189258994,True,ARG59 -positive,1547,345.90682158468985,True,LYS166 -positive,509,116.7485765833226,False,LYS224 -positive,239,50.16736294916978,False,PRO14 -positive,224,44.41874485333689,False,GLN82 -positive,169,28.95821309534873,True,THR74 -positive,152,27.99561352816951,True,ARG187 -positive,129,23.216873083982687,True,GLU1 -positive,113,20.46684395168215,False,ARG182 -positive,90,17.829391566192037,False,GLY189 -positive,101,13.83507011562028,True,TYR170 -positive,29,4.595708916427887,False,THR195 -positive,10,2.6442337743944853,False,SER85 -positive,14,2.3280992398040787,False,THR69 -positive,23,2.097231570286438,True,PHE27 -positive,14,2.04419205063933,False,SER198 -positive,11,1.986901989790291,False,ARG87 -positive,17,1.285559272976008,False,SER197 -positive,9,1.1505531487664118,True,TRP47 -positive,3,0.38534614109958326,True,ARG145 -positive,6,0.37090140750726835,False,ARG182 -positive,7,0.04858950236157684,False,LYS166 -negative,259,65.43616275570122,True,ASP31 -negative,302,64.7879934150777,False,GLU89 -negative,32,7.416446479527371,False,ASP138 -negative,28,4.260605361192842,False,GLU226 -negative,23,2.6213427555188056,True,THR32 -negative,19,2.212451793625336,True,LEU175 -negative,21,2.0886264474594762,False,GLN13 -negative,8,1.0222647085405423,True,GLY100 -negative,3,0.6006055508090062,False,SER188 -negative,3,0.3172385767901459,True,GLY101 -negative,9,0.2869369854197508,False,GLU46 +positive,1725,4.182278508729034,True,ARG59 +positive,1345,3.3018772409740245,True,LYS166 +positive,474,1.1227447014770573,False,LYS224 +positive,212,0.4780537234772543,False,PRO14 +positive,196,0.42350239444585136,False,GLN82 +positive,153,0.26352678455568534,True,THR74 +positive,131,0.2623972855174914,True,ARG187 +positive,113,0.2239198468055985,True,GLU1 +positive,91,0.18047135288680483,False,ARG182 +positive,82,0.17243666156532797,False,ARG187 +positive,85,0.11763809617857156,True,TYR170 +positive,24,0.04135049908954891,False,THR195 +positive,10,0.027526301024288966,False,SER85 +positive,12,0.02167615367273435,False,SER198 +positive,12,0.019511131671895172,False,THR69 +positive,7,0.015586532451777404,False,ARG87 +positive,18,0.01556431292971975,True,PHE27 +positive,8,0.006207044981287775,True,PHE219 +positive,5,0.0058994152378983485,False,SER197 +positive,3,0.005203025153503235,False,ARG182 +positive,6,0.00519997560517816,False,ARG182 +positive,3,0.003164115750466868,True,ARG145 +positive,2,0.0025863522808405913,True,LYS30 +negative,241,0.6363343407004047,True,ASP31 +negative,268,0.6162607965834226,False,GLU89 +negative,27,0.07296024063999074,False,ASP138 +negative,25,0.04096139412400949,False,GLU226 +negative,23,0.022811577815085794,True,THR32 +negative,17,0.016368615194651954,True,LEU175 +negative,14,0.015925282524936708,False,GLN13 +negative,7,0.011160988364316836,True,GLY100 +negative,6,0.010733508585959266,False,ALA40 diff --git a/test/trastuzumab/apbs-patches.save b/test/trastuzumab/apbs-patches.save index a4960e0..4e97ca3 100644 --- a/test/trastuzumab/apbs-patches.save +++ b/test/trastuzumab/apbs-patches.save @@ -1,37 +1,37 @@ type,npoints,area,cdr,main_residue -positive,2706,405.5523245886125,True,ARG59 -positive,2050,313.58175296220804,True,LYS166 -positive,714,107.85564371527026,False,LYS224 -positive,269,43.49029152878992,False,PRO14 -positive,269,40.06015391376671,False,GLN82 -positive,164,24.193549860998854,True,ARG187 -positive,146,23.264461253442278,True,THR74 -positive,138,21.455028005128646,True,GLU1 -positive,120,17.16530442445461,False,ARG187 -positive,119,17.067557218893242,False,ARG182 -positive,66,10.186808113730878,True,TYR170 -positive,22,3.3606860217405483,False,THR195 -positive,30,3.0287956125102933,True,ALA79 -positive,15,2.3222784647950907,False,SER85 -positive,9,1.524505876004696,False,TYR60 -positive,14,1.1946246155227223,False,SER198 -positive,6,0.7587038942923148,True,PHE27 -positive,3,0.633904864701132,False,ARG87 -positive,2,0.4148370213806629,False,ARG139 -positive,1,0.11851314455270767,True,THR218 -positive,6,0.10143411136232316,False,LYS166 -positive,6,0.06065049464814365,False,ILE34 -positive,6,0.025959379854612052,True,LYS30 -positive,6,0.00643309680162929,False,LYS166 -positive,6,6.140272148513759e-06,True,TRP99 -negative,379,60.80976378557092,True,ASP31 -negative,375,56.702972870492886,False,GLU89 -negative,44,6.2794784369907575,False,ASP138 -negative,18,3.5186013849840183,False,GLU226 -negative,20,1.6015525545711475,True,THR32 -negative,5,0.9135120759407679,False,LEU11 -negative,5,0.7823746613382052,False,ALA40 -negative,3,0.4150233045220375,True,ASP108 -negative,5,0.31328256490329903,True,LEU175 -negative,4,0.1601275751988093,False,SER181 -negative,6,0.1596132023259997,False,ALA40 +positive,2706,4.0555237192411315,True,ARG59 +positive,2050,3.1358180610606072,True,LYS166 +positive,714,1.0785564850219775,False,LYS224 +positive,269,0.4349030355542176,False,PRO14 +positive,269,0.4006016732560038,False,GLN82 +positive,164,0.24193543018723437,True,ARG187 +positive,146,0.23264465711205656,True,THR74 +positive,138,0.2145502520054379,True,GLU1 +positive,120,0.1716530068706561,False,ARG187 +positive,119,0.1706755639411952,False,ARG182 +positive,66,0.10186810414633858,True,TYR170 +positive,22,0.033606791475904174,False,THR195 +positive,30,0.030287938567198577,True,ALA79 +positive,15,0.023222786999516153,False,SER85 +positive,9,0.015245096806514388,False,TYR60 +positive,14,0.011946238389706801,False,SER198 +positive,6,0.007587021401074405,True,PHE27 +positive,3,0.00633904083467011,False,ARG87 +positive,2,0.004148372012423352,False,ARG139 +positive,1,0.001185125942962865,True,THR218 +positive,6,0.0010143504841835236,False,LYS166 +positive,6,0.0006065153211238794,False,ILE34 +positive,6,0.00025960786297218874,True,LYS30 +positive,6,6.43316927835258e-05,False,LYS166 +positive,6,6.1429446418515e-08,True,TRP99 +negative,379,0.6080976480519841,True,ASP31 +negative,375,0.5670296154251784,False,GLU89 +negative,44,0.06279480835187692,False,ASP138 +negative,18,0.03518602688154715,False,GLU226 +negative,20,0.016015456201785128,True,THR32 +negative,5,0.00913510504324222,False,LEU11 +negative,5,0.0078237506319662,False,ALA40 +negative,3,0.004150241366005503,True,ASP108 +negative,5,0.003132833357085474,True,LEU175 +negative,4,0.0016012810156098565,False,SER181 +negative,6,0.001596131180122029,False,ALA40