Source code for omicsdata.tree.neutree

###############################################################################################################
# __init__.py
# 
# Contains the source code for reading/writing 'Neutree' namedtuples 
###############################################################################################################
import pickle, sys, os
from collections import namedtuple
import numpy as np

sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
from columns import NEUTREE_Columns

# for compatibility with Pairtree, we reuse the Neutree named tuple
Neutree = namedtuple('Neutree', (NEUTREE_Columns.STRUCTS, NEUTREE_Columns.PHIS, NEUTREE_Columns.COUNTS, NEUTREE_Columns.LOGSCORES, NEUTREE_Columns.CLUSTERINGS, NEUTREE_Columns.GARBAGE))

[docs]def save(ntree, neutree_fn):
	"""Saves the data for a bulk DNA cancer phylongeny reconstruction in a generalized format
	that's simply a zipped archive containing a namedtuple
	
	Parameters
	----------
	ntree : namedtuple
		the name tuple that will be written to a zipped archive
	neutree_fn : str
		the file name that the ntree namedtuple will be written to

	Returns
	-------
	None
	"""
	N = len(ntree.structs)
	for K in (NEUTREE_Columns.STRUCTS, NEUTREE_Columns.PHIS, NEUTREE_Columns.COUNTS, NEUTREE_Columns.LOGSCORES, NEUTREE_Columns.CLUSTERINGS):
		assert len(getattr(ntree, K)) == N, '%s has length %s instead of %s' % (K, len(getattr(ntree, K)), N)

	# we always expect data in the Neutree archive to be ndarray's
	arr_vals = {K: np.array(getattr(ntree, K)) for K in (NEUTREE_Columns.COUNTS, NEUTREE_Columns.LOGSCORES)}
	ntree = ntree._replace(**arr_vals)

	with open(neutree_fn, 'wb') as F:
		pickle.dump(ntree, F)

[docs]def load(neutree_fn):
	"""Loads the Neutree namedtuple from a zipped archive
	
	Parameters
	----------
	neutree_fn : str
		the file name that the ntree namedtuple will be loaded from

	Returns
	-------
	pickle
		a pickle file loaded into memory
	"""
	with open(neutree_fn, 'rb') as F:
		return pickle.load(F)