From 3bb8e27558014503ff0052669b2d88be897e10fd Mon Sep 17 00:00:00 2001 From: Franz Waibl Date: Fri, 17 Jan 2025 14:23:17 +0100 Subject: [PATCH 1/2] Change units in electrostatics script to nm This also changes the created surface somewhat, because the msms resolution is not equivalent. Overall, the results are similar and the created surface looks somewhat smoother. --- surface_analyses/commandline_electrostatic.py | 47 ++++++++------ surface_analyses/hydrophobic_potential.py | 2 +- surface_analyses/surface.py | 24 +++++-- test/commandline_electrostatic_test.py | 14 ++-- test/trastuzumab/apbs-patches-msms.save | 65 +++++++++---------- 5 files changed, 83 insertions(+), 69 deletions(-) diff --git a/surface_analyses/commandline_electrostatic.py b/surface_analyses/commandline_electrostatic.py index da5a3ae..a0f6b0a 100644 --- a/surface_analyses/commandline_electrostatic.py +++ b/surface_analyses/commandline_electrostatic.py @@ -53,7 +53,7 @@ def parse_args(argv=None): add_trajectory_options_to_parser(parser) parser.add_argument('--dx', type=str, default=None, nargs='?', help="Optional dx file with the electrostatic potential. If this is omitted, you must specify --apbs_dir") parser.add_argument('--apbs_dir', help="Directory in which intermediate files are stored when running APBS. Will be created if it does not exist.", type=str, default=None) - parser.add_argument('--probe_radius', type=float, help='probe radius in Angstrom', default=1.4) + parser.add_argument('--probe_radius', type=float, help='Probe radius in nm', default=0.14) parser.add_argument('-o', '--out', default=None, type=str, help='Output csv file.') parser.add_argument( '-c', '--patch_cutoff', @@ -146,7 +146,7 @@ def run_electrostatics( traj: md.Trajectory, dx: str = None, apbs_dir: str = None, - probe_radius: float = 1.4, + probe_radius: float = 0.14, out: str = None, patch_cutoff: tuple = (2., -2.), integral_cutoff: tuple = (0.3, -0.3), @@ -191,31 +191,23 @@ def run_electrostatics( if dx is not None: griddata = load_dx(dx, colname='DX') - griddata.struct = traj[0] else: griddata = get_apbs_potential_from_mdtraj(traj, apbs_dir, pH, ion_species) - - # *10 because mdtraj calculates stuff in nanometers instead of Angstrom. - radii = 10. * np.array([atom.element.radius for atom in traj.top.atoms]) + grid = griddata.grid.scale(0.1) columns = ['DX'] print('Run info:') pprint.pprint({ '#Atoms': traj.n_atoms, - 'Grid dimensions': griddata.grid.shape, + 'Grid dimensions': grid.shape, # **kwargs, }) - print('Calculating triangulated SASA') + radii = np.array([atom.element.radius for atom in traj.top.atoms]) + coord = traj.xyz[0] - if surface_type == 'sas': - surf = compute_sas(griddata.grid, griddata.coord, radii, probe_radius) - elif surface_type == 'gauss': - surf = compute_gauss_surf(griddata.grid, griddata.coord, radii, gauss_shift, gauss_scale) - elif surface_type == 'ses': - surf = compute_ses(griddata.grid, griddata.coord, radii, probe_radius) - else: - raise ValueError("Unknown surface type: " + str(surface_type)) + print('Calculating triangulated SASA') + surf = calculate_surface(surface_type, grid, coord, radii, probe_radius, gauss_shift, gauss_scale) if check_cdrs: try: @@ -234,7 +226,7 @@ def run_electrostatics( cdrs = [] residues = np.array([str(a.residue) for a in traj.top.atoms]) - pdbtree = cKDTree(traj.xyz[0] * 10.) + pdbtree = cKDTree(coord) _, closest_atom = pdbtree.query(surf.vertices) # Calculate the area of each triangle, and split evenly among the vertices. @@ -245,7 +237,8 @@ def run_electrostatics( # The patch searching print('Finding patches') - values = griddata.interpolate(columns, surf.vertices)[columns[0]] + # the griddata object is in Angstroms + values = griddata.interpolate(columns, surf.vertices * 10)[columns[0]] patches = pd.DataFrame({ 'positive': assign_patches(surf.faces, values > patch_cutoff[0]), 'negative': assign_patches(surf.faces, values < patch_cutoff[1]), @@ -286,10 +279,10 @@ def run_electrostatics( write_patches(patches, output, 'negative') # Compute the total solvent-accessible potential. - within_range, closest_atom, distance = griddata.distance_to_spheres(rmax=10, atomic_radii=radii) + within_range, closest_atom, distance = grid.distance_to_spheres(centers=traj.xyz[0], rmax=1., radii=radii) not_protein = distance > probe_radius accessible = within_range[not_protein] - voxel_volume = griddata.grid.voxel_volume + voxel_volume = grid.voxel_volume accessible_data = griddata[columns[0]].values[accessible] integral = np.sum(accessible_data) * voxel_volume integral_high = np.sum(np.maximum(accessible_data - integral_cutoff[0], 0)) * voxel_volume @@ -334,6 +327,19 @@ def run_electrostatics( } +def calculate_surface(surf_type, grid, coord, radii, probe_radius, gauss_shift, gauss_scale) -> Surface: + """Calculate a surface. Note: all inputs should be in nm.""" + if surf_type == 'sas': + surf = compute_sas(grid, coord, radii, probe_radius) + elif surf_type == 'gauss': + surf = compute_gauss_surf(grid, coord, radii, gauss_shift, gauss_scale) + elif surf_type == 'ses': + surf = compute_ses(grid, coord, radii, probe_radius) + else: + raise ValueError("Unknown surface type: " + str(surf_type)) + return surf + + IonSpecies = namedtuple("IonSpecies", "charge concentration radius") DEFAULT_ION_SPECIES = [IonSpecies(1.0, 0.1, 2.0), IonSpecies(-1.0, 0.1, 2.0)] @@ -380,7 +386,6 @@ def get_apbs_potential_from_mdtraj(traj, apbs_dir, pH, ion_species): else: raise ValueError("Neither apbs.pqr-PE0.dx nor apbs.pqr.dx were found in the apbs directory.") griddata = load_dx(dxfile, colname='DX') - griddata.struct = traj[0] return griddata def write_patches(df, out, column): diff --git a/surface_analyses/hydrophobic_potential.py b/surface_analyses/hydrophobic_potential.py index c42bf71..9ddc188 100644 --- a/surface_analyses/hydrophobic_potential.py +++ b/surface_analyses/hydrophobic_potential.py @@ -7,7 +7,7 @@ from skimage.measure import marching_cubes import numpy as np -from .surface import Surface, gaussian_grid, compute_ses +from .surface import gaussian_grid, compute_ses # Todo: don't use a cubic grid for hydrophobic_potential. Rectangular is more # efficient. diff --git a/surface_analyses/surface.py b/surface_analyses/surface.py index 5ee35d6..dff8ec0 100644 --- a/surface_analyses/surface.py +++ b/surface_analyses/surface.py @@ -34,7 +34,7 @@ def __init__(self, vertices, faces): Parameters ---------- vertices: np.ndarray, shape=(n_verts, 3) - vertices in Angstrom. Will be converted to nm, when writing a ply file. + vertices in nm. faces: np.ndarray, shape=(n_verts, 3) Triangles defined as 3 integer indices for vertices. """ @@ -89,11 +89,11 @@ def vertex_areas(self): def __repr__(self): return f"{self.__class__.__name__}({self.n_vertices} vertices, {self.n_faces} faces)" - def as_plydata(self, text=True, units_per_angstrom=0.01): + def as_plydata(self, text=True, units_per_angstrom=0.1): """Convert to a plyfile.PlyData object, while scaling coordinates - the default scaling units_per_angstrom=0.01 matches the PyMol CGO - scaling factor of 1:100. + the default scaling units_per_angstrom=0.1 matches the PyMol CGO + scaling factor of 1:100 and the nm -> A conversion. """ vertex_dtype = [("x", "f4"), ("y", "f4"), ("z", "f4")] keys = list(self.data.keys()) @@ -121,8 +121,9 @@ def as_plydata(self, text=True, units_per_angstrom=0.01): return out def write_ply(self, fname, coordinate_scaling=1.): + "Write a PLY file, scaling is relative to the default of 0.1." with open(fname, mode="wb") as f: - self.as_plydata(units_per_angstrom=coordinate_scaling*0.01).write(f) + self.as_plydata(units_per_angstrom=coordinate_scaling*0.1).write(f) @classmethod def from_plydata(cls, plydata): @@ -268,6 +269,11 @@ def compute_sas(grid, xyz, radii, solvent_radius=0.14): def compute_sas_msms(centers, radii, solvent_radius): + """Compute a solvent-accessible surface area in msms. + + SAS is not implemented in msms, but can be approximated using a very small + solvent radius. The inputs should be in nanometer (nm) units! + """ shifted_radii = np.array(radii) + solvent_radius tiny = 0.01 return compute_ses_msms(centers, shifted_radii, solvent_radius=tiny) @@ -342,7 +348,11 @@ def compute_ses_gisttools(grid, xyz, radii, solvent_radius=0.14): def compute_ses_msms(xyz, radii, solvent_radius=0.14, density=3.0): - """Compute a SES using MSMS.""" + """Compute a SES using MSMS. + + All inputs should be in nanometer (nm) units, except for the density, which + is passed to msms as-is. + """ # conversion nm -> Angstrom msms_out = msms.run_msms(xyz*10, radii*10, probe_radius=solvent_radius*10, density=density) vertices = msms_out.get_vertex_positions() / 10 @@ -376,7 +386,7 @@ def gaussian_grid(grid, xyz, sigma): def gaussian_grid_variable_sigma(grid, xyz, sigma, rmax=None): - """sum(exp(-dist(x, x_i)**2)/(2*sigma_i**2)) at each grid point x, where + """sum(exp((-dist(x, x_i)**2)/(2*sigma_i**2))) at each grid point x, where the sum is over every x_i out of xyz. Parameters diff --git a/test/commandline_electrostatic_test.py b/test/commandline_electrostatic_test.py index 55cf666..8ca709d 100644 --- a/test/commandline_electrostatic_test.py +++ b/test/commandline_electrostatic_test.py @@ -8,6 +8,7 @@ from pathlib import Path import os import shutil +from unittest.mock import patch import pandas as pd from pandas.testing import assert_frame_equal @@ -65,11 +66,11 @@ def with_or_without_cdrs(request): def test_trastuzumab_sas_integrals(with_or_without_cdrs): expected = np.array( [ - 25015.40424103, - 12573.01033872, - 29718.71768997, - -4703.31344894, - -1867.65861091, + 25.01540424103, + 12.57301033872, + 29.71871768997, + -4.70331344894, + -1.86765861091, ] ) if with_or_without_cdrs == 'with': @@ -96,7 +97,6 @@ def test_trastuzumab_sas_integrals(with_or_without_cdrs): expected_patches = pd.read_csv(str(TRASTUZUMAB_PATH / exp_fname)) if with_or_without_cdrs == 'without': expected_patches['cdr'] = False - print(expected_patches['cdr'].sum(), patches['cdr'].sum()) assert_frame_equal(patches, expected_patches) @@ -120,7 +120,7 @@ def test_trastuzumab_ply_out(): # check the number of vertices in the output with open(TRASTUZUMAB_PATH / 'apbs-potential.ply') as f: if msms.msms_available(): - assert get_nth_line(f, 2).strip() == "element vertex 43411" + assert get_nth_line(f, 2).strip() == "element vertex 40549" else: assert get_nth_line(f, 2).strip() == "element vertex 67252" diff --git a/test/trastuzumab/apbs-patches-msms.save b/test/trastuzumab/apbs-patches-msms.save index 1808595..231a753 100644 --- a/test/trastuzumab/apbs-patches-msms.save +++ b/test/trastuzumab/apbs-patches-msms.save @@ -1,34 +1,33 @@ type,npoints,area,cdr,main_residue -positive,1863,431.54070189258994,True,ARG59 -positive,1547,345.90682158468985,True,LYS166 -positive,509,116.7485765833226,False,LYS224 -positive,239,50.16736294916978,False,PRO14 -positive,224,44.41874485333689,False,GLN82 -positive,169,28.95821309534873,True,THR74 -positive,152,27.99561352816951,True,ARG187 -positive,129,23.216873083982687,True,GLU1 -positive,113,20.46684395168215,False,ARG182 -positive,90,17.829391566192037,False,GLY189 -positive,101,13.83507011562028,True,TYR170 -positive,29,4.595708916427887,False,THR195 -positive,10,2.6442337743944853,False,SER85 -positive,14,2.3280992398040787,False,THR69 -positive,23,2.097231570286438,True,PHE27 -positive,14,2.04419205063933,False,SER198 -positive,11,1.986901989790291,False,ARG87 -positive,17,1.285559272976008,False,SER197 -positive,9,1.1505531487664118,True,TRP47 -positive,3,0.38534614109958326,True,ARG145 -positive,6,0.37090140750726835,False,ARG182 -positive,7,0.04858950236157684,False,LYS166 -negative,259,65.43616275570122,True,ASP31 -negative,302,64.7879934150777,False,GLU89 -negative,32,7.416446479527371,False,ASP138 -negative,28,4.260605361192842,False,GLU226 -negative,23,2.6213427555188056,True,THR32 -negative,19,2.212451793625336,True,LEU175 -negative,21,2.0886264474594762,False,GLN13 -negative,8,1.0222647085405423,True,GLY100 -negative,3,0.6006055508090062,False,SER188 -negative,3,0.3172385767901459,True,GLY101 -negative,9,0.2869369854197508,False,GLU46 +positive,1725,4.182278508729034,True,ARG59 +positive,1345,3.3018772409740245,True,LYS166 +positive,474,1.1227447014770573,False,LYS224 +positive,212,0.4780537234772543,False,PRO14 +positive,196,0.42350239444585136,False,GLN82 +positive,153,0.26352678455568534,True,THR74 +positive,131,0.2623972855174914,True,ARG187 +positive,113,0.2239198468055985,True,GLU1 +positive,91,0.18047135288680483,False,ARG182 +positive,82,0.17243666156532797,False,ARG187 +positive,85,0.11763809617857156,True,TYR170 +positive,24,0.04135049908954891,False,THR195 +positive,10,0.027526301024288966,False,SER85 +positive,12,0.02167615367273435,False,SER198 +positive,12,0.019511131671895172,False,THR69 +positive,7,0.015586532451777404,False,ARG87 +positive,18,0.01556431292971975,True,PHE27 +positive,8,0.006207044981287775,True,PHE219 +positive,5,0.0058994152378983485,False,SER197 +positive,3,0.005203025153503235,False,ARG182 +positive,6,0.00519997560517816,False,ARG182 +positive,3,0.003164115750466868,True,ARG145 +positive,2,0.0025863522808405913,True,LYS30 +negative,241,0.6363343407004047,True,ASP31 +negative,268,0.6162607965834226,False,GLU89 +negative,27,0.07296024063999074,False,ASP138 +negative,25,0.04096139412400949,False,GLU226 +negative,23,0.022811577815085794,True,THR32 +negative,17,0.016368615194651954,True,LEU175 +negative,14,0.015925282524936708,False,GLN13 +negative,7,0.011160988364316836,True,GLY100 +negative,6,0.010733508585959266,False,ALA40 From ee62de582c8384428e78bb415fe57b1a70933934 Mon Sep 17 00:00:00 2001 From: Franz Waibl Date: Fri, 17 Jan 2025 14:32:11 +0100 Subject: [PATCH 2/2] Update expected results when msms is not available --- test/trastuzumab/apbs-patches.save | 72 +++++++++++++++--------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/test/trastuzumab/apbs-patches.save b/test/trastuzumab/apbs-patches.save index a4960e0..4e97ca3 100644 --- a/test/trastuzumab/apbs-patches.save +++ b/test/trastuzumab/apbs-patches.save @@ -1,37 +1,37 @@ type,npoints,area,cdr,main_residue -positive,2706,405.5523245886125,True,ARG59 -positive,2050,313.58175296220804,True,LYS166 -positive,714,107.85564371527026,False,LYS224 -positive,269,43.49029152878992,False,PRO14 -positive,269,40.06015391376671,False,GLN82 -positive,164,24.193549860998854,True,ARG187 -positive,146,23.264461253442278,True,THR74 -positive,138,21.455028005128646,True,GLU1 -positive,120,17.16530442445461,False,ARG187 -positive,119,17.067557218893242,False,ARG182 -positive,66,10.186808113730878,True,TYR170 -positive,22,3.3606860217405483,False,THR195 -positive,30,3.0287956125102933,True,ALA79 -positive,15,2.3222784647950907,False,SER85 -positive,9,1.524505876004696,False,TYR60 -positive,14,1.1946246155227223,False,SER198 -positive,6,0.7587038942923148,True,PHE27 -positive,3,0.633904864701132,False,ARG87 -positive,2,0.4148370213806629,False,ARG139 -positive,1,0.11851314455270767,True,THR218 -positive,6,0.10143411136232316,False,LYS166 -positive,6,0.06065049464814365,False,ILE34 -positive,6,0.025959379854612052,True,LYS30 -positive,6,0.00643309680162929,False,LYS166 -positive,6,6.140272148513759e-06,True,TRP99 -negative,379,60.80976378557092,True,ASP31 -negative,375,56.702972870492886,False,GLU89 -negative,44,6.2794784369907575,False,ASP138 -negative,18,3.5186013849840183,False,GLU226 -negative,20,1.6015525545711475,True,THR32 -negative,5,0.9135120759407679,False,LEU11 -negative,5,0.7823746613382052,False,ALA40 -negative,3,0.4150233045220375,True,ASP108 -negative,5,0.31328256490329903,True,LEU175 -negative,4,0.1601275751988093,False,SER181 -negative,6,0.1596132023259997,False,ALA40 +positive,2706,4.0555237192411315,True,ARG59 +positive,2050,3.1358180610606072,True,LYS166 +positive,714,1.0785564850219775,False,LYS224 +positive,269,0.4349030355542176,False,PRO14 +positive,269,0.4006016732560038,False,GLN82 +positive,164,0.24193543018723437,True,ARG187 +positive,146,0.23264465711205656,True,THR74 +positive,138,0.2145502520054379,True,GLU1 +positive,120,0.1716530068706561,False,ARG187 +positive,119,0.1706755639411952,False,ARG182 +positive,66,0.10186810414633858,True,TYR170 +positive,22,0.033606791475904174,False,THR195 +positive,30,0.030287938567198577,True,ALA79 +positive,15,0.023222786999516153,False,SER85 +positive,9,0.015245096806514388,False,TYR60 +positive,14,0.011946238389706801,False,SER198 +positive,6,0.007587021401074405,True,PHE27 +positive,3,0.00633904083467011,False,ARG87 +positive,2,0.004148372012423352,False,ARG139 +positive,1,0.001185125942962865,True,THR218 +positive,6,0.0010143504841835236,False,LYS166 +positive,6,0.0006065153211238794,False,ILE34 +positive,6,0.00025960786297218874,True,LYS30 +positive,6,6.43316927835258e-05,False,LYS166 +positive,6,6.1429446418515e-08,True,TRP99 +negative,379,0.6080976480519841,True,ASP31 +negative,375,0.5670296154251784,False,GLU89 +negative,44,0.06279480835187692,False,ASP138 +negative,18,0.03518602688154715,False,GLU226 +negative,20,0.016015456201785128,True,THR32 +negative,5,0.00913510504324222,False,LEU11 +negative,5,0.0078237506319662,False,ALA40 +negative,3,0.004150241366005503,True,ASP108 +negative,5,0.003132833357085474,True,LEU175 +negative,4,0.0016012810156098565,False,SER181 +negative,6,0.001596131180122029,False,ALA40