Skip to content

Commit 5c5bb80

Browse files
WIP Use PDBInf
1 parent 5ab9ecc commit 5c5bb80

File tree

3 files changed

+24
-134
lines changed

3 files changed

+24
-134
lines changed

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- openff-toolkit >=0.13.0
1010
- openff-units >=0.2.0
1111
- openff-models >=0.0.5
12+
- pdbinf
1213
- pip
1314
- pydantic
1415
- pytest

gufe/components/proteincomponent.py

Lines changed: 23 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,11 @@
11
# This code is part of OpenFE and is licensed under the MIT license.
22
# For details, see https://github.com/OpenFreeEnergy/gufe
3-
import ast
4-
import json
53
import io
64
import numpy as np
75
from os import PathLike
8-
from typing import Union, Optional
9-
from collections import defaultdict
6+
from typing import Union
107

11-
from openmm import app
12-
from openmm import unit as omm_unit
8+
import pdbinf
139

1410
from rdkit import Chem
1511
from rdkit.Chem.rdchem import Mol, Atom, Conformer, EditableMol, BondType
@@ -51,10 +47,6 @@
5147
}
5248

5349

54-
negative_ions = ["F", "CL", "Br", "I"]
55-
positive_ions = ["NA", "MG", "ZN"]
56-
57-
5850
class ProteinComponent(ExplicitMoleculeComponent):
5951
"""Wrapper around a Protein representation.
6052
@@ -82,7 +74,6 @@ def __init__(self, rdkit: RDKitMol, name=""):
8274
"Consider loading via rdkit.Chem.MolFromPDBFile or similar.")
8375
super().__init__(rdkit=rdkit, name=name)
8476

85-
# FROM
8677
@classmethod
8778
def from_pdb_file(cls, pdb_file: str, name: str = ""):
8879
"""
@@ -100,9 +91,10 @@ def from_pdb_file(cls, pdb_file: str, name: str = ""):
10091
ProteinComponent
10192
the deserialized molecule
10293
"""
103-
openmm_PDBFile = PDBFile(pdb_file)
104-
return cls._from_openmmPDBFile(
105-
openmm_PDBFile=openmm_PDBFile, name=name
94+
return cls(
95+
rdkit=pdbinf.load_pdb_file(pdb_file,
96+
templates=[pdbinf.STANDARD_AA_DOC]),
97+
name=name,
10698
)
10799

108100
@classmethod
@@ -112,7 +104,7 @@ def from_pdbx_file(cls, pdbx_file: str, name=""):
112104
113105
Parameters
114106
----------
115-
pdbxfile : str
107+
pdbx_file : str
116108
path to the pdb file.
117109
name : str, optional
118110
name of the input protein, by default ""
@@ -122,117 +114,12 @@ def from_pdbx_file(cls, pdbx_file: str, name=""):
122114
ProteinComponent
123115
the deserialized molecule
124116
"""
125-
openmm_PDBxFile = PDBxFile(pdbx_file)
126-
return cls._from_openmmPDBFile(
127-
openmm_PDBFile=openmm_PDBxFile, name=name
117+
return cls(
118+
rdkit=pdbinf.load_pdbx_file(pdbx_file,
119+
templates=[pdbinf.STANDARD_AA_DOC]),
120+
name=name,
128121
)
129122

130-
@classmethod
131-
def _from_openmmPDBFile(cls, openmm_PDBFile: Union[PDBFile, PDBxFile],
132-
name: str = ""):
133-
"""Converts to our internal representation (rdkit Mol)
134-
135-
Parameters
136-
----------
137-
openmm_PDBFile : PDBFile or PDBxFile
138-
object of the protein
139-
name : str
140-
name of the protein
141-
142-
Returns
143-
-------
144-
ProteinComponent
145-
the deserialized molecule
146-
"""
147-
periodicTable = Chem.GetPeriodicTable()
148-
mol_topology = openmm_PDBFile.getTopology()
149-
150-
rd_mol = Mol()
151-
editable_rdmol = EditableMol(rd_mol)
152-
153-
# Add Atoms
154-
for atom in mol_topology.atoms():
155-
a = Atom(atom.element.atomic_number)
156-
157-
atom_monomerInfo = Chem.AtomPDBResidueInfo()
158-
atom_monomerInfo.SetChainId(atom.residue.chain.id)
159-
atom_monomerInfo.SetSerialNumber(int(atom.id))
160-
atom_monomerInfo.SetSegmentNumber(int(atom.residue.chain.index))
161-
atom_monomerInfo.SetInsertionCode(atom.residue.insertionCode)
162-
atom_monomerInfo.SetName(atom.name)
163-
atom_monomerInfo.SetResidueName(atom.residue.name)
164-
atom_monomerInfo.SetResidueNumber(int(atom.residue.id))
165-
atom_monomerInfo.SetIsHeteroAtom(False) # TODO: Do hetatoms
166-
167-
a.SetMonomerInfo(atom_monomerInfo)
168-
169-
# additonally possible:
170-
# atom_monomerInfo.SetSecondaryStructure
171-
# atom_monomerInfo.SetMonomerType
172-
# atom_monomerInfo.SetAltLoc
173-
174-
editable_rdmol.AddAtom(a)
175-
176-
# Add Bonds
177-
for bond in mol_topology.bonds():
178-
bond_order = _BONDORDERS_OPENMM_TO_RDKIT[bond.order]
179-
editable_rdmol.AddBond(
180-
beginAtomIdx=bond.atom1.index,
181-
endAtomIdx=bond.atom2.index,
182-
order=bond_order,
183-
)
184-
185-
# Set Positions
186-
rd_mol = editable_rdmol.GetMol()
187-
positions = np.array(
188-
openmm_PDBFile.positions.value_in_unit(omm_unit.angstrom), ndmin=3
189-
)
190-
191-
for frame_id, frame in enumerate(positions):
192-
conf = Conformer(frame_id)
193-
for atom_id, atom_pos in enumerate(frame):
194-
conf.SetAtomPosition(atom_id, atom_pos)
195-
rd_mol.AddConformer(conf)
196-
197-
# Add Additionals
198-
# Formal Charge
199-
netcharge = 0
200-
for a in rd_mol.GetAtoms():
201-
atomic_num = a.GetAtomicNum()
202-
atom_name = a.GetMonomerInfo().GetName()
203-
204-
connectivity = sum(
205-
int(bond.GetBondType()) for bond in a.GetBonds()
206-
)
207-
default_valence = periodicTable.GetDefaultValence(atomic_num)
208-
209-
if connectivity == 0: # ions:
210-
if atom_name in positive_ions:
211-
fc = default_valence # e.g. Sodium ions
212-
elif atom_name in negative_ions:
213-
fc = - default_valence # e.g. Chlorine ions
214-
else: # -no-cov-
215-
resn = a.GetMonomerInfo().GetResidueName()
216-
resind = int(a.GetMonomerInfo().GetResidueNumber())
217-
raise ValueError(
218-
"I don't know this Ion or something really went "
219-
f"wrong! \t{atom_name}\t{resn}\t-{resind}\t"
220-
f"connectivity{connectivity}"
221-
)
222-
elif default_valence > connectivity:
223-
fc = - (default_valence - connectivity) # negative charge
224-
elif default_valence < connectivity:
225-
fc = + (connectivity - default_valence) # positive charge
226-
else:
227-
fc = 0 # neutral
228-
229-
a.SetFormalCharge(fc)
230-
a.UpdatePropertyCache(strict=True)
231-
232-
netcharge += fc
233-
234-
return cls(rdkit=rd_mol, name=name)
235-
236123
@classmethod
237124
def _from_dict(cls, ser_dict: dict, name: str = ""):
238125
"""Deserialize from dict representation"""
@@ -294,15 +181,16 @@ def _from_dict(cls, ser_dict: dict, name: str = ""):
294181

295182
return cls(rdkit=rd_mol, name=name)
296183

297-
# TO
298-
def to_openmm_topology(self) -> app.Topology:
184+
def to_openmm_topology(self) -> "app.Topology":
299185
"""Convert to an openmm Topology object
300186
301187
Returns
302188
-------
303189
openmm.app.Topology
304190
resulting topology obj.
305191
"""
192+
from openmm import app
193+
306194
def reskey(m):
307195
"""key for defining when a residue has changed from previous
308196
@@ -370,16 +258,18 @@ def chainkey(m):
370258

371259
return top
372260

373-
def to_openmm_positions(self) -> omm_unit.Quantity:
374-
"""
375-
serialize the positions to openmm.unit.Quantity
261+
def to_openmm_positions(self) -> "openmm.app.unit.Quantity":
262+
"""serialize the positions to openmm.unit.Quantity
263+
376264
! only one frame at the moment!
377265
378266
Returns
379267
-------
380268
omm_unit.Quantity
381269
Quantity containing protein atom positions
382270
"""
271+
from openmm import unit as omm_unit
272+
383273
np_pos = deserialize_numpy(self.to_dict()["conformers"][0])
384274
openmm_pos = (
385275
list(map(lambda x: np.array(x), np_pos)) * omm_unit.angstrom
@@ -388,8 +278,7 @@ def to_openmm_positions(self) -> omm_unit.Quantity:
388278
return openmm_pos
389279

390280
def to_pdb_file(self, out_path: Union[str, bytes, PathLike[str], PathLike[bytes], io.TextIOBase]) -> str:
391-
"""
392-
serialize protein to pdb file.
281+
"""Write protein to pdb file.
393282
394283
Parameters
395284
----------
@@ -434,8 +323,7 @@ def to_pdb_file(self, out_path: Union[str, bytes, PathLike[str], PathLike[bytes]
434323
def to_pdbx_file(
435324
self, out_path: Union[str, bytes, PathLike[str], PathLike[bytes], io.TextIOBase]
436325
) -> str:
437-
"""
438-
serialize protein to pdbx file.
326+
"""Write protein to pdbx file.
439327
440328
Parameters
441329
----------
@@ -447,6 +335,8 @@ def to_pdbx_file(
447335
str
448336
string path to the resulting pdbx.
449337
"""
338+
from openmm import unit as omm_unit
339+
450340
# get top:
451341
top = self.to_openmm_topology()
452342

@@ -472,7 +362,6 @@ def to_pdbx_file(
472362

473363
PDBxFile.writeFile(topology=top, positions=openmm_pos, file=out_file)
474364

475-
476365
if must_close:
477366
# we only close the file if we had to open it
478367
out_file.close()

setup.cfg

Whitespace-only changes.

0 commit comments

Comments
 (0)