Workflow 4 - analyze the results


  1. Cluster interactive plots

  2. Movie trajectory plots

[1]:
#required imports
from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import interactive, fixed, IntSlider, VBox, HBox, Layout
import ipywidgets as widgets
import matplotlib.patheffects as PathEffects
from IPython.display import display,clear_output
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from engens.core.EnGens import EnGen
from engens.core.ClustEn import *
from engens.core.FeatureSelector import *
from engens.core.DimReduction import *
from engens.core.PlotUtils import *
import pickle as pk
import os

Warning: importing 'simtk.openmm' is deprecated.  Import 'openmm' instead.
[2]:
plt.ioff()
[3]:
engen = None
with open("wf3_resulting_EnGen.pickle", "rb") as file:
    engen = pk.load(file)
clust = None
with open("wf3_resulting_Clust.pickle", "rb") as file:
    clust = pk.load(file)
[4]:
%matplotlib inline

Cluster plots


[5]:
p_utils1 = PlotUtils(engen, clust, stride=20)
# if your trajectory is very large plotting this dashboard migth take some time
# because one point for every frame has to be plotted
# if you want a faster plot - provide the stride - run:
# p_utils1 = PlotUtils(engen, clust, stride=20)
# this will load every 20th frame from the trajectory
p_utils1.dashboard1(path = "./res_ensemble")

View multiple structures per cluster


[6]:
cluster_index = 0
n_frames = 5
[7]:
p_utils1.plot_frames_from_cluster(cluster_index, n_frames,
                                  plot_representative=True)
Cluster 0 contains 2820 frames/structures
Uniformly sampled 5 frames from cluster 0:
[   1 1125 2316 3452 5000]
Adding frame 1
Adding frame 1125
Adding frame 2316
Adding frame 3452
Adding frame 5000
Adding the representative frame 983
[8]:
cluster_index = 2
n_frames = 5
[9]:
# You can ask for the sampled frames to be extracted into a folder of your choice
p_utils1.plot_frames_from_cluster(cluster_index, n_frames,
                                  plot_representative=True,
                                 extract_selected=True,
                                folder_path="./cluster3_samples")
Cluster 2 contains 795 frames/structures
Uniformly sampled 5 frames from cluster 2:
[  12 1574 3690 4481 4946]
Adding frame 12
Adding frame 1574
Adding frame 3690
Adding frame 4481
Adding frame 4946
Adding the representative frame 4030
Extracting ./cluster3_samples/cluster_2_frame_12.pdb
Extracting ./cluster3_samples/cluster_2_frame_1574.pdb
Extracting ./cluster3_samples/cluster_2_frame_3690.pdb
Extracting ./cluster3_samples/cluster_2_frame_4481.pdb
Extracting ./cluster3_samples/cluster_2_frame_4946.pdb
Extracting ./cluster3_samples/cluster_2_representative_frame_4030.pdb

Trajectory movie


[10]:
p_utils1.view_movie("cartoon")
[11]:
p_utils1.view_movie("ball_and_stick")

Additional info per cluster - distance between two residues


[12]:
residue1_index = 0
residue2_index = 3
[13]:
from pyemma.coordinates import source

pyemma_src = source(engen.traj, top=engen.ref, chunksize=100)
pyemma_src.featurizer.add_residue_mindist([[residue1_index,residue2_index]])

data = p_utils1.load_as_memmap(pyemma_src)
100%|██████████| 51/51 [00:00<00:00, 479.00it/s]
[14]:
scatter_fig = p_utils1.plot_custom_feature_scatter(data[:, 0].flatten(),
                                                   y_title="residue mindist {}-{}".format(residue1_index, residue2_index))
[array([   1,    4,    5, ..., 4998, 4999, 5000]), array([   0,    2,    3, ..., 4993, 4994, 4995]), array([  12,   76,   77, ..., 4944, 4945, 4946])]
[array([0.3727922 , 0.40462458, 0.40593845, ..., 0.32041377, 0.34461126,
       0.32366803], dtype=float32), array([0.69963634, 0.52747977, 0.31400812, ..., 0.34616482, 0.36649022,
       0.5603731 ], dtype=float32), array([0.69967127, 0.6979241 , 0.6454751 , ..., 0.6746199 , 0.62525284,
       0.6606203 ], dtype=float32)]
[15]:
box_fig = p_utils1.plot_custom_feature_box(data[:, 0].flatten(),
                                                   y_title="residue mindist {}-{}".format(residue1_index, residue2_index))

Additional info per cluster - RMSD to a frame


[16]:
ref_frame = 3294
[17]:
from pyemma.coordinates import source

pyemma_src = source(engen.traj, top=engen.ref, chunksize=100)
pyemma_src.featurizer.add_minrmsd_to_ref(engen.traj, ref_frame=ref_frame)

data = p_utils1.load_as_memmap(pyemma_src)
100%|██████████| 51/51 [00:00<00:00, 547.07it/s]
[18]:
scatter_fig = p_utils1.plot_custom_feature_scatter(data[:, 0].flatten(),
                                                   y_title="RMSD to frame {}".format(ref_frame))
[array([   1,    4,    5, ..., 4998, 4999, 5000]), array([   0,    2,    3, ..., 4993, 4994, 4995]), array([  12,   76,   77, ..., 4944, 4945, 4946])]
[array([0.43822834, 0.45047235, 0.55501133, ..., 0.46977922, 0.45583323,
       0.46433878], dtype=float32), array([0.48848018, 0.42792878, 0.39013505, ..., 0.39538947, 0.4615347 ,
       0.4281541 ], dtype=float32), array([0.48617372, 0.479701  , 0.47244528, ..., 0.43468508, 0.47706974,
       0.45323363], dtype=float32)]
[19]:
scatter_fig = p_utils1.plot_custom_feature_box(data[:, 0].flatten(),
                                                   y_title="RMSD to frame {}".format(ref_frame))

Additional info per cluster - RMSD to another structure (given a PDB file)


[20]:
#ref_file = "./pentapeptide-00-500ns-impl-solv-aligned-engen-selected.pdb"
[21]:
from pyemma.coordinates import source

#pyemma_src = source(engen.traj, top=engen.ref, chunksize=100)
#pyemma_src.featurizer.add_minrmsd_to_ref(mdtraj.load(ref_file))

#data = p_utils1.load_as_memmap(pyemma_src)
[22]:
#scatter_fig = p_utils1.plot_custom_feature_scatter(data[:, 0].flatten(),
                                                   #y_title="RMSD to structure")
[23]:
#scatter_fig = p_utils1.plot_custom_feature_box(data[:, 0].flatten(),
                                                   #y_title="RMSD to frame {}".format(ref_frame))

Additional info per cluster - component from dimensionality reduction


[24]:
component_index = 2
[25]:
data = engen.dimred_data[:, component_index]
[26]:
scatter_fig = p_utils1.plot_custom_feature_scatter(data,
                                                   y_title="Value of component {}".format(component_index))
[array([   1,    4,    5, ..., 4998, 4999, 5000]), array([   0,    2,    3, ..., 4993, 4994, 4995]), array([  12,   76,   77, ..., 4944, 4945, 4946])]
[array([-0.2035274 ,  0.05434679,  0.5587249 , ...,  0.22342978,
        0.8517926 , -0.3846619 ], dtype=float32), array([ 0.51692855,  0.2768213 , -0.29748067, ..., -0.19363607,
        0.26827607,  0.18262684], dtype=float32), array([-0.5186528 , -0.00988184, -0.2456175 , ...,  0.06303956,
       -0.67660075,  0.38237935], dtype=float32)]
[27]:
scatter_fig = p_utils1.plot_custom_feature_box(data,
                                                   y_title="Value of component {}".format(component_index))
[ ]: