demo_plinder_smina

Running

App Files Files Community

demo_plinder_smina / inference_app.py

simonduerr

Update inference_app.py

ff1a3bf verified 4 days ago

raw

history blame

No virus

7.12 kB


	import time

	import gradio as gr

	from gradio_molecule3d import Molecule3D

	import sys
	import os
	import os
	import numpy as np
	from rdkit import Chem
	from rdkit.Chem import AllChem
	from rdkit.Chem import Draw
	from rdkit.Chem.Draw import IPythonConsole
	from rdkit.Chem import DataStructs
	from rdkit.Chem import RDConfig
	from rdkit.Chem import rdBase
	import pickle

	from Bio.PDB import *
	from Bio import PDB
	import requests
	import subprocess

	import mdtraj as md
	from enspara import geometry
	from sklearn.cluster import DBSCAN
	import pandas as pd


	def run_smina(
	ligand_path, protein_path, out_path, pocket_center, pocket_size, num_poses=1, exhaustiveness=1
	):
	"""
	Perform docking with Smina.

	Parameters
	----------
	ligand_path: str or pathlib.Path
	Path to ligand PDBQT file that should be docked.
	protein_path: str or pathlib.Path
	Path to protein PDBQT file that should be docked to.
	out_path: str or pathlib.Path
	Path to which docking poses should be saved, SDF or PDB format.
	pocket_center: iterable of float or int
	Coordinates defining the center of the binding site.
	pocket_size: iterable of float or int
	Lengths of edges defining the binding site.
	num_poses: int
	Maximum number of poses to generate.
	exhaustiveness: int
	Accuracy of docking calculations.

	Returns
	-------
	output_text: str
	The output of the Smina calculation.
	"""
	output_text = subprocess.check_output(
	[
	"./smina.static",
	"--ligand",
	str(ligand_path),
	"--receptor",
	str(protein_path),
	"--out",
	str(out_path),
	"--center_x",
	str(pocket_center[0]),
	"--center_y",
	str(pocket_center[1]),
	"--center_z",
	str(pocket_center[2]),
	"--size_x",
	str(pocket_size[0]),
	"--size_y",
	str(pocket_size[1]),
	"--size_z",
	str(pocket_size[2]),
	"--num_modes",
	str(num_poses),
	"--exhaustiveness",
	str(exhaustiveness),
	],
	universal_newlines=True, # needed to capture output text
	)
	return output_text

	def predict (input_sequence, input_ligand, input_protein, exhaustiveness):
	"""
	Main prediction function that calls ligsite and smina

	Parameters
	----------
	input_sequence: str
	monomer sequence
	input_ligand: str
	ligand as SMILES string
	protein_path: gradio.File
	Gradio file object to monomer protein structure as PDB
	exhaustiveness: int
	SMINA parameter

	Returns
	-------
	output_structures: tuple
	(output_protein, output_ligand_sdf)
	run_time: float
	run time of the program
	"""
	start_time = time.time()

	if input_protein==None:
	raise gr.Error("need pdb input")
	m=Chem.MolFromSmiles(input_ligand)

	m2=Chem.AddHs(m)
	AllChem.EmbedMolecule(m2)
	AllChem.MMFFOptimizeMolecule(m2)

	Chem.SDWriter("/usr/src/app/ligand.sdf").write(m2)

	os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt")
	os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt")

	#Find pocket
	pdb = md.load(input_protein.name)
	# run ligsite
	pockets_xyz = geometry.pockets.get_pocket_cells(struct=pdb)
	eps_value = 0.15
	min_samples_value = 5
	dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value)
	labels = dbscan.fit_predict(pockets_xyz)
	# Find the unique clusters and their sizes
	unique_labels, counts = np.unique(labels, return_counts=True)
	# Exclude noise points
	valid_clusters = unique_labels[unique_labels != -1]
	valid_counts = counts[unique_labels != -1]
	# Find the cluster with the most points (highest density)
	densest_cluster_label = valid_clusters[np.argmax(valid_counts)]
	densest_cluster_points = pockets_xyz[labels == densest_cluster_label]
	# write cluster to PDB
	top_df = pd.DataFrame()
	top_df['serial'] = list(range(densest_cluster_points.shape[0]))
	top_df['name'] = 'PK'
	top_df['element'] = 'H'
	top_df['resSeq'] = list(range(densest_cluster_points.shape[0]))
	top_df['resName'] = 'PCK'
	top_df['chainID'] = 0
	pocket_top = md.Topology.from_dataframe(top_df, np.array([]))
	pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top)
	pocket_trj.save('/usr/src/app/pockets_dense.pdb')
	parser = PDBParser()
	struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb")
	coords = [x.coord for x in struc.get_atoms()]
	pocket_center = np.mean(coords, axis=0)
	# run smina
	output_text = run_smina(
	"/usr/src/app/ligand.pdbqt",
	"/usr/src/app/receptor.pdbqt",
	"/usr/src/app/docking_pose.sdf",
	pocket_center,
	[10,10,10],
	exhaustiveness=exhaustiveness
	)
	end_time = time.time()
	run_time = end_time - start_time
	return [input_protein.name,"/usr/src/app/docking_pose.sdf"], run_time

	with gr.Blocks() as app:

	gr.Markdown("# LigSite + Smina")

	gr.Markdown("Example model using LigSite and DBScan to find a binding pocket in the protein and then SMINA to dock the ligand in the found pocket.")
	with gr.Row():
	input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)")
	input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES")
	input_protein = gr.File(label="Input protein monomer")


	# define any options here

	# for automated inference the default options are used
	exhaustiveness = gr.Slider(1,10,value=1, label="Slider Option")
	# checkbox_option = gr.Checkbox(label="Checkbox Option")
	# dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")

	btn = gr.Button("Run Inference")

	gr.Examples(
	[
	[
	"SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL",
	"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
	"input_test.pdb"
	],
	],
	[input_sequence, input_ligand, input_protein],
	)
	reps = [
	{
	"model": 0,
	"style": "cartoon",
	"color": "whiteCarbon",
	},
	{
	"model": 0,
	"resname": "UNK",
	"style": "stick",
	"color": "greenCarbon",
	},
	{
	"model": 0,
	"resname": "LIG",
	"style": "stick",
	"color": "greenCarbon",
	},
	{
	"model": 1,
	"style": "stick",
	"color": "greenCarbon",
	}

	]

	out = Molecule3D(reps=reps)
	run_time = gr.Textbox(label="Runtime")

	btn.click(predict, inputs=[input_sequence, input_ligand, input_protein, exhaustiveness], outputs=[out, run_time])

	app.launch()