Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

from __future__ import division, unicode_literals 

import io 

import unicodedata 

import logging 

from tempfile import NamedTemporaryFile 

from functools import partial 

import itertools 

import types 

from tqdm import tqdm 

 

from six import text_type 

from clldutils.path import Path, remove, path_component 

from clldutils import clilib 

from clldutils.misc import slug 

 

import lingpy 

from lingpy.log import get_level, file_written 

 

PROG = 'LingPy-{0}'.format(lingpy.__version__) 

 

pb = partial(tqdm, leave=False) 

 

 

def charstring(id_, char='X', cls='-'): 

return '{0}.{1}.{2}'.format(id_, char, cls) 

 

 

def combinations2(iterable): 

""" 

Convenience shortcut 

""" 

return itertools.combinations(iterable, 2) 

 

 

def multicombinations2(iterable): 

""" 

Convenience shortcut, for the name, see the Wikipedia article on Combination. 

 

https://en.wikipedia.org/wiki/Combination#Number_of_combinations_with_repetition 

""" 

return itertools.combinations_with_replacement(iterable, 2) 

 

 

product2 = partial(itertools.product, repeat=2) 

 

 

def join(sep, *args, **kw): 

""" 

Convenience shortcut. Strings to be joined do not have to be passed as list or tuple. 

 

Notes 

----- 

An implicit conversion of objects to strings is performed as well. 

 

""" 

if len(args) == 1 and isinstance(args[0], (list, tuple, types.GeneratorType)): 

args = args[0] 

condition = kw.get('condition', lambda x: True) 

return sep.join(['%s' % arg for arg in args if condition(arg)]) 

 

 

dotjoin = partial(join, '.') 

tabjoin = partial(join, '\t') 

confirm = partial(clilib.confirm, default=False) 

 

 

class TemporaryPath(object): 

def __init__(self, suffix=''): 

fp = NamedTemporaryFile(suffix=suffix) 

self.name = Path(fp.name) 

fp.close() 

 

def __enter__(self): 

return self.name.as_posix() 

 

def __exit__(self, exc_type, exc_val, exc_tb): 

if self.name.exists(): 

remove(self.name) 

 

 

def lingpy_path(*comps): 

return Path(lingpy.__file__).parent.joinpath(*comps).as_posix() 

 

 

data_path = partial(lingpy_path, 'data') 

 

 

def _str_path(path, mkdir=False): 

"""Get a file-system path as text_type, suitable for passing into io.open. 

 

Parameters 

---------- 

path : {text_type, Path} 

A fs path either as Path instance or as text_type. 

mkdir : bool (default=False) 

If True, create the directories within the path. 

 

Returns 

------- 

path : text_type 

The path as text_type. 

""" 

res = Path(path_component(path)) 

if mkdir and res.parent and not res.parent.exists(): 

res.parent.mkdir(parents=True) 

return res.as_posix() 

 

 

def write_text_file(path, content, normalize=None, log=True): 

"""Write a text file encoded in utf-8. 

 

Parameters 

---------- 

path : str 

File-system path of the file. 

content : str 

The text content to be written. 

normalize : { None, "NFC", "NFD" } (default=False) 

If not `None` a valid unicode normalization mode must be passed. 

log : bool (default=True) 

Indicate whether you want to log the result of the file writing 

process. 

 

""" 

if not isinstance(content, text_type): 

content = lines_to_text(content) 

with io.open(_str_path(path, mkdir=True), 'w', encoding='utf8') as fp: 

fp.write(unicodedata.normalize(normalize, content) if normalize else content) 

if log: 

file_written(_str_path(path)) 

 

 

def lines_to_text(lines): 

return ''.join(line if line.endswith('\n') else line + '\n' for line in lines) 

 

 

class TextFile(object): 

def __init__(self, path, log=True): 

self.path = path 

self.log = log 

self.fp = io.open(_str_path(path, mkdir=True), 'w', encoding='utf8') 

 

def __enter__(self): 

return self.fp 

 

def __exit__(self, type, value, traceback): 

self.fp.close() 

if self.log: 

file_written(_str_path(self.path)) 

 

 

def read_text_file(path, normalize=None, lines=False): 

""" 

Read a text file encoded in utf-8. 

 

Parameters 

---------- 

path : { Path, str } 

File-system path of the file. 

normalize : { None, "NFC", "NFC" } 

If not `None` a valid unicode normalization mode must be passed. 

lines : bool (default=False) 

Flag signalling whether to return a list of lines (without 

the line-separation character). 

 

Returns 

------- 

file_content : { list, str } 

File content as unicode object or list of lines as unicode objects. 

 

Notes 

----- 

The whole file is read into memory. 

 

""" 

 

def _normalize(chunk): 

return unicodedata.normalize(normalize, chunk) if normalize else chunk 

 

with io.open(_str_path(path), 'r', encoding='utf-8-sig') as fp: 

if lines: 

return [_normalize(line.strip('\r\n')) for line in fp] 

else: 

return _normalize(fp.read()) 

 

 

def as_string(obj, pprint=False): 

obj = text_type(obj) 

if get_level() <= logging.ERROR and pprint: 

print(obj) 

return obj 

 

 

def read_config_file(path, **kw): 

"""Read lines of a file ignoring commented lines and empty lines. """ 

kw['lines'] = True 

lines = [line.strip() for line in read_text_file(path, **kw)] 

return [line for line in lines if line and not line.startswith('#')] 

 

 

def setdefaults(d, **kw): 

"""Shortcut for a common idiom, setting multiple default values at once. 

 

Parameters 

---------- 

d : dict 

Dictionary to be updated. 

kw : dict 

Dictionary with default values. 

""" 

for k, v in kw.items(): 

d.setdefault(k, v) 

 

 

def identity(x): 

return x 

 

 

def nexus_slug(s): 

""" 

Converts a string to a nexus "safe" representation (i.e. removes 

many unicode characters and removes some punctuation characters). 

 

Parameters 

---------- 

s : str 

A string to convert to a nexus safe format. 

 

Returns 

------- 

s : str 

A string containing a nexus safe label. 

""" 

return slug(s, lowercase=False, remove_whitespace=False).replace(" ", "_")