Spaces:
Running
Running
import time | |
import gradio as gr | |
from gradio_molecule3d import Molecule3D | |
import sys | |
import os | |
import os | |
import numpy as np | |
from rdkit import Chem | |
from rdkit.Chem import AllChem | |
from rdkit.Chem import Draw | |
from rdkit.Chem.Draw import IPythonConsole | |
from rdkit.Chem import DataStructs | |
from rdkit.Chem import RDConfig | |
from rdkit.Chem import rdBase | |
import pickle | |
from Bio.PDB import * | |
from Bio import PDB | |
import requests | |
import subprocess | |
import mdtraj as md | |
from enspara import geometry | |
from sklearn.cluster import DBSCAN | |
import pandas as pd | |
def run_smina( | |
ligand_path, protein_path, out_path, pocket_center, pocket_size, num_poses=1, exhaustiveness=1 | |
): | |
""" | |
Perform docking with Smina. | |
Parameters | |
---------- | |
ligand_path: str or pathlib.Path | |
Path to ligand PDBQT file that should be docked. | |
protein_path: str or pathlib.Path | |
Path to protein PDBQT file that should be docked to. | |
out_path: str or pathlib.Path | |
Path to which docking poses should be saved, SDF or PDB format. | |
pocket_center: iterable of float or int | |
Coordinates defining the center of the binding site. | |
pocket_size: iterable of float or int | |
Lengths of edges defining the binding site. | |
num_poses: int | |
Maximum number of poses to generate. | |
exhaustiveness: int | |
Accuracy of docking calculations. | |
Returns | |
------- | |
output_text: str | |
The output of the Smina calculation. | |
""" | |
output_text = subprocess.check_output( | |
[ | |
"./smina.static", | |
"--ligand", | |
str(ligand_path), | |
"--receptor", | |
str(protein_path), | |
"--out", | |
str(out_path), | |
"--center_x", | |
str(pocket_center[0]), | |
"--center_y", | |
str(pocket_center[1]), | |
"--center_z", | |
str(pocket_center[2]), | |
"--size_x", | |
str(pocket_size[0]), | |
"--size_y", | |
str(pocket_size[1]), | |
"--size_z", | |
str(pocket_size[2]), | |
"--num_modes", | |
str(num_poses), | |
"--exhaustiveness", | |
str(exhaustiveness), | |
], | |
universal_newlines=True, # needed to capture output text | |
) | |
return output_text | |
def predict (input_sequence, input_ligand, input_protein, exhaustiveness): | |
""" | |
Main prediction function that calls ligsite and smina | |
Parameters | |
---------- | |
input_sequence: str | |
monomer sequence | |
input_ligand: str | |
ligand as SMILES string | |
protein_path: gradio.File | |
Gradio file object to monomer protein structure as PDB | |
exhaustiveness: int | |
SMINA parameter | |
Returns | |
------- | |
output_structures: tuple | |
(output_protein, output_ligand_sdf) | |
run_time: float | |
run time of the program | |
""" | |
start_time = time.time() | |
if input_protein==None: | |
raise gr.Error("need pdb input") | |
m=Chem.MolFromSmiles(input_ligand) | |
m2=Chem.AddHs(m) | |
AllChem.EmbedMolecule(m2) | |
AllChem.MMFFOptimizeMolecule(m2) | |
Chem.SDWriter("/usr/src/app/ligand.sdf").write(m2) | |
os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt") | |
os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt") | |
#Find pocket | |
pdb = md.load(input_protein.name) | |
# run ligsite | |
pockets_xyz = geometry.pockets.get_pocket_cells(struct=pdb) | |
eps_value = 0.15 | |
min_samples_value = 5 | |
dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value) | |
labels = dbscan.fit_predict(pockets_xyz) | |
# Find the unique clusters and their sizes | |
unique_labels, counts = np.unique(labels, return_counts=True) | |
# Exclude noise points | |
valid_clusters = unique_labels[unique_labels != -1] | |
valid_counts = counts[unique_labels != -1] | |
# Find the cluster with the most points (highest density) | |
densest_cluster_label = valid_clusters[np.argmax(valid_counts)] | |
densest_cluster_points = pockets_xyz[labels == densest_cluster_label] | |
# write cluster to PDB | |
top_df = pd.DataFrame() | |
top_df['serial'] = list(range(densest_cluster_points.shape[0])) | |
top_df['name'] = 'PK' | |
top_df['element'] = 'H' | |
top_df['resSeq'] = list(range(densest_cluster_points.shape[0])) | |
top_df['resName'] = 'PCK' | |
top_df['chainID'] = 0 | |
pocket_top = md.Topology.from_dataframe(top_df, np.array([])) | |
pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top) | |
pocket_trj.save('/usr/src/app/pockets_dense.pdb') | |
parser = PDBParser() | |
struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb") | |
coords = [x.coord for x in struc.get_atoms()] | |
pocket_center = np.mean(coords, axis=0) | |
# run smina | |
output_text = run_smina( | |
"/usr/src/app/ligand.pdbqt", | |
"/usr/src/app/receptor.pdbqt", | |
"/usr/src/app/docking_pose.sdf", | |
pocket_center, | |
[10,10,10], | |
exhaustiveness=exhaustiveness | |
) | |
end_time = time.time() | |
run_time = end_time - start_time | |
return [input_protein.name,"/usr/src/app/docking_pose.sdf"], run_time | |
with gr.Blocks() as app: | |
gr.Markdown("# LigSite + Smina") | |
gr.Markdown("Example model using LigSite and DBScan to find a binding pocket in the protein and then SMINA to dock the ligand in the found pocket.") | |
with gr.Row(): | |
input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)") | |
input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES") | |
input_protein = gr.File(label="Input protein monomer") | |
# define any options here | |
# for automated inference the default options are used | |
exhaustiveness = gr.Slider(1,10,value=1, label="Slider Option") | |
# checkbox_option = gr.Checkbox(label="Checkbox Option") | |
# dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option") | |
btn = gr.Button("Run Inference") | |
gr.Examples( | |
[ | |
[ | |
"SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL", | |
"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", | |
"input_test.pdb" | |
], | |
], | |
[input_sequence, input_ligand, input_protein], | |
) | |
reps = [ | |
{ | |
"model": 0, | |
"style": "cartoon", | |
"color": "whiteCarbon", | |
}, | |
{ | |
"model": 0, | |
"resname": "UNK", | |
"style": "stick", | |
"color": "greenCarbon", | |
}, | |
{ | |
"model": 0, | |
"resname": "LIG", | |
"style": "stick", | |
"color": "greenCarbon", | |
}, | |
{ | |
"model": 1, | |
"style": "stick", | |
"color": "greenCarbon", | |
} | |
] | |
out = Molecule3D(reps=reps) | |
run_time = gr.Textbox(label="Runtime") | |
btn.click(predict, inputs=[input_sequence, input_ligand, input_protein, exhaustiveness], outputs=[out, run_time]) | |
app.launch() | |