Workflow 4 - analyze the results
Cluster interactive plots
Movie trajectory plots
[1]:
#required imports
from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import interactive, fixed, IntSlider, VBox, HBox, Layout
import ipywidgets as widgets
import matplotlib.patheffects as PathEffects
from IPython.display import display,clear_output
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from engens.core.EnGens import EnGen
from engens.core.ClustEn import *
from engens.core.FeatureSelector import *
from engens.core.DimReduction import *
from engens.core.PlotUtils import *
import pickle as pk
import os
Warning: importing 'simtk.openmm' is deprecated. Import 'openmm' instead.
[2]:
plt.ioff()
[3]:
engen = None
with open("wf3_resulting_EnGen.pickle", "rb") as file:
engen = pk.load(file)
clust = None
with open("wf3_resulting_Clust.pickle", "rb") as file:
clust = pk.load(file)
[4]:
%matplotlib inline
Cluster plots
[5]:
p_utils1 = PlotUtils(engen, clust, stride=20)
# if your trajectory is very large plotting this dashboard migth take some time
# because one point for every frame has to be plotted
# if you want a faster plot - provide the stride - run:
# p_utils1 = PlotUtils(engen, clust, stride=20)
# this will load every 20th frame from the trajectory
p_utils1.dashboard1(path = "./res_ensemble")
View multiple structures per cluster
[6]:
cluster_index = 0
n_frames = 5
[7]:
p_utils1.plot_frames_from_cluster(cluster_index, n_frames,
plot_representative=True)
Cluster 0 contains 2820 frames/structures
Uniformly sampled 5 frames from cluster 0:
[ 1 1125 2316 3452 5000]
Adding frame 1
Adding frame 1125
Adding frame 2316
Adding frame 3452
Adding frame 5000
Adding the representative frame 983
[8]:
cluster_index = 2
n_frames = 5
[9]:
# You can ask for the sampled frames to be extracted into a folder of your choice
p_utils1.plot_frames_from_cluster(cluster_index, n_frames,
plot_representative=True,
extract_selected=True,
folder_path="./cluster3_samples")
Cluster 2 contains 795 frames/structures
Uniformly sampled 5 frames from cluster 2:
[ 12 1574 3690 4481 4946]
Adding frame 12
Adding frame 1574
Adding frame 3690
Adding frame 4481
Adding frame 4946
Adding the representative frame 4030
Extracting ./cluster3_samples/cluster_2_frame_12.pdb
Extracting ./cluster3_samples/cluster_2_frame_1574.pdb
Extracting ./cluster3_samples/cluster_2_frame_3690.pdb
Extracting ./cluster3_samples/cluster_2_frame_4481.pdb
Extracting ./cluster3_samples/cluster_2_frame_4946.pdb
Extracting ./cluster3_samples/cluster_2_representative_frame_4030.pdb
Trajectory movie
[10]:
p_utils1.view_movie("cartoon")
[11]:
p_utils1.view_movie("ball_and_stick")
Additional info per cluster - distance between two residues
[12]:
residue1_index = 0
residue2_index = 3
[13]:
from pyemma.coordinates import source
pyemma_src = source(engen.traj, top=engen.ref, chunksize=100)
pyemma_src.featurizer.add_residue_mindist([[residue1_index,residue2_index]])
data = p_utils1.load_as_memmap(pyemma_src)
100%|██████████| 51/51 [00:00<00:00, 479.00it/s]
[14]:
scatter_fig = p_utils1.plot_custom_feature_scatter(data[:, 0].flatten(),
y_title="residue mindist {}-{}".format(residue1_index, residue2_index))
[array([ 1, 4, 5, ..., 4998, 4999, 5000]), array([ 0, 2, 3, ..., 4993, 4994, 4995]), array([ 12, 76, 77, ..., 4944, 4945, 4946])]
[array([0.3727922 , 0.40462458, 0.40593845, ..., 0.32041377, 0.34461126,
0.32366803], dtype=float32), array([0.69963634, 0.52747977, 0.31400812, ..., 0.34616482, 0.36649022,
0.5603731 ], dtype=float32), array([0.69967127, 0.6979241 , 0.6454751 , ..., 0.6746199 , 0.62525284,
0.6606203 ], dtype=float32)]
[15]:
box_fig = p_utils1.plot_custom_feature_box(data[:, 0].flatten(),
y_title="residue mindist {}-{}".format(residue1_index, residue2_index))
Additional info per cluster - RMSD to a frame
[16]:
ref_frame = 3294
[17]:
from pyemma.coordinates import source
pyemma_src = source(engen.traj, top=engen.ref, chunksize=100)
pyemma_src.featurizer.add_minrmsd_to_ref(engen.traj, ref_frame=ref_frame)
data = p_utils1.load_as_memmap(pyemma_src)
100%|██████████| 51/51 [00:00<00:00, 547.07it/s]
[18]:
scatter_fig = p_utils1.plot_custom_feature_scatter(data[:, 0].flatten(),
y_title="RMSD to frame {}".format(ref_frame))
[array([ 1, 4, 5, ..., 4998, 4999, 5000]), array([ 0, 2, 3, ..., 4993, 4994, 4995]), array([ 12, 76, 77, ..., 4944, 4945, 4946])]
[array([0.43822834, 0.45047235, 0.55501133, ..., 0.46977922, 0.45583323,
0.46433878], dtype=float32), array([0.48848018, 0.42792878, 0.39013505, ..., 0.39538947, 0.4615347 ,
0.4281541 ], dtype=float32), array([0.48617372, 0.479701 , 0.47244528, ..., 0.43468508, 0.47706974,
0.45323363], dtype=float32)]
[19]:
scatter_fig = p_utils1.plot_custom_feature_box(data[:, 0].flatten(),
y_title="RMSD to frame {}".format(ref_frame))
Additional info per cluster - RMSD to another structure (given a PDB file)
[20]:
#ref_file = "./pentapeptide-00-500ns-impl-solv-aligned-engen-selected.pdb"
[21]:
from pyemma.coordinates import source
#pyemma_src = source(engen.traj, top=engen.ref, chunksize=100)
#pyemma_src.featurizer.add_minrmsd_to_ref(mdtraj.load(ref_file))
#data = p_utils1.load_as_memmap(pyemma_src)
[22]:
#scatter_fig = p_utils1.plot_custom_feature_scatter(data[:, 0].flatten(),
#y_title="RMSD to structure")
[23]:
#scatter_fig = p_utils1.plot_custom_feature_box(data[:, 0].flatten(),
#y_title="RMSD to frame {}".format(ref_frame))
Additional info per cluster - component from dimensionality reduction
[24]:
component_index = 2
[25]:
data = engen.dimred_data[:, component_index]
[26]:
scatter_fig = p_utils1.plot_custom_feature_scatter(data,
y_title="Value of component {}".format(component_index))
[array([ 1, 4, 5, ..., 4998, 4999, 5000]), array([ 0, 2, 3, ..., 4993, 4994, 4995]), array([ 12, 76, 77, ..., 4944, 4945, 4946])]
[array([-0.2035274 , 0.05434679, 0.5587249 , ..., 0.22342978,
0.8517926 , -0.3846619 ], dtype=float32), array([ 0.51692855, 0.2768213 , -0.29748067, ..., -0.19363607,
0.26827607, 0.18262684], dtype=float32), array([-0.5186528 , -0.00988184, -0.2456175 , ..., 0.06303956,
-0.67660075, 0.38237935], dtype=float32)]
[27]:
scatter_fig = p_utils1.plot_custom_feature_box(data,
y_title="Value of component {}".format(component_index))
[ ]: