Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

""" 

Functions for tree calculations and working with trees. 

""" 

import xml.dom.minidom as minidom 

from collections import deque 

 

from lingpy.thirdparty import cogent as cg 

from lingpy import util 

 

 

def _nwk_format(taxon): 

""" 

Function cleans a taxon string in order to make it apt for newick-representation. 

""" 

# strip whitespace off 

taxon = taxon.strip() 

 

# replace white space underscore 

taxon = taxon.replace(' ', '_') 

 

# exclude all kinds of brackets 

return ''.join([t for t in taxon if t not in '!?()[]{},;:."' + "'"]) 

 

def nwk2tree_matrix(newick): 

""" 

Convert a newick file to a tree matrix. 

 

Notes 

----- 

This is an additional function that can be used for plots with help of 

matplotlibs functions. The tree_matrix is compatible with those matrices 

that scipy's linkage functions create. 

""" 

if type(newick) == str: 

tree = cg.LoadTree(treestring=newick) 

elif hasattr(newick, 'root'): 

tree = newick 

 

taxa = [t for t in sorted( 

tree.taxa, 

key=lambda x: len(tree.getConnectingEdges('root', x)), 

reverse=True 

)] 

 

tax2id = dict(zip(taxa, range(len(taxa)))) 

nodes = [t for t in tree.getNodeNames() if t not in taxa] 

 

nodes = sorted( 

nodes, 

key=lambda x: len(tree.getNodeMatchingName(x).tips()), 

) 

matrix = [] 

 

for node in nodes: 

n = tree.getNodeMatchingName(node) 

children = n.Children 

names = [c.Name for c in children] 

idxA = tax2id[names[0]] 

idxB = tax2id[names[1]] 

idx = max(tax2id.values()) + 1 

tax2id[node] = idx 

obs = len(n.tips()) 

dst = obs * 1.0 

matrix += [[idxA, idxB, dst, obs]] 

 

return matrix, taxa