mirror of
https://github.com/google-deepmind/deepmind-research.git
synced 2025-12-11 00:46:20 +08:00
67 lines
2.5 KiB
Python
67 lines
2.5 KiB
Python
# Copyright 2019 DeepMind Technologies Limited
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Parsers for various standard biology or AlphaFold-specific formats."""
|
|
|
|
import pickle
|
|
|
|
import tensorflow as tf # pylint: disable=g-explicit-tensorflow-version-import
|
|
|
|
|
|
def distance_histogram_dict(f):
|
|
"""Parses distance histogram dict pickle.
|
|
|
|
Distance histograms are stored as pickles of dicts.
|
|
|
|
Write one of these with contacts/write_rr_file.write_pickle_file()
|
|
|
|
Args:
|
|
f: File-like handle to distance histogram dict pickle.
|
|
|
|
Returns:
|
|
Dict with fields:
|
|
probs: (an L x L x num_bins) histogram.
|
|
num_bins: number of bins for each residue pair
|
|
min_range: left hand edge of the distance histogram
|
|
max_range: the extent of the histogram NOT the right hand edge.
|
|
"""
|
|
contact_dict = pickle.load(f, encoding='latin1')
|
|
|
|
num_res = len(contact_dict['sequence'])
|
|
|
|
if not all(key in contact_dict.keys()
|
|
for key in ['probs', 'num_bins', 'min_range', 'max_range']):
|
|
raise ValueError('The pickled contact dict doesn\'t contain all required '
|
|
'keys: probs, num_bins, min_range, max_range but %s.' %
|
|
contact_dict.keys())
|
|
if contact_dict['probs'].ndim != 3:
|
|
raise ValueError(
|
|
'Probs is not rank 3 but %d' % contact_dict['probs'].ndim)
|
|
if contact_dict['num_bins'] != contact_dict['probs'].shape[2]:
|
|
raise ValueError(
|
|
'The probs shape doesn\'t match num_bins in the third dimension. '
|
|
'Expected %d got %d.' % (contact_dict['num_bins'],
|
|
contact_dict['probs'].shape[2]))
|
|
if contact_dict['probs'].shape[:2] != (num_res, num_res):
|
|
raise ValueError(
|
|
'The first two probs dims (%i, %i) aren\'t equal to len(sequence) %i'
|
|
% (contact_dict['probs'].shape[0], contact_dict['probs'].shape[1],
|
|
num_res))
|
|
return contact_dict
|
|
|
|
|
|
def parse_distance_histogram_dict(filepath):
|
|
"""Parses distance histogram piclkle from filepath."""
|
|
with tf.io.gfile.GFile(filepath, 'rb') as f:
|
|
return distance_histogram_dict(f)
|