Coverage for concept/concept.py: 82%

1from typing import Dict

3from util.constants import *

6class Concept:

7 """

8 Stores concept information retrieved from WoRMS and the VARS kb.

9 """

11 def __init__(self, concept_name: str):

12 """

13 :param str concept_name: The VARS concept name of the organism we want to get information about.

14 """

15 self.concept_name = concept_name # the given concept name from the VARS annotation record

16 self.aphia_id = NULL_VAL_INT # to fetch from WoRMS

17 self.scientific_name = NULL_VAL_STRING # to fetch from WoRMS

18 self.authorship = NULL_VAL_STRING # to fetch from WoRMS

19 self.vernacular_names = NULL_VAL_STRING # to fetch from WoRMS

20 self.synonyms = [] # to fetch from VARS kb

21 self.taxon_rank = NULL_VAL_STRING # to fetch from WoRMS

22 self.taxon_ranks = {} # the phylogeny/taxon tree (kingdom, phylum, class, etc)

23 self.descriptors = [] # extra words from the annotation record that aren't the scientific name

24 self.concept_words = [] # for cleaning concept name

25 self.concept_add_words = [] # for cleaning concept name

26 self.cf_flag = [] # (cf = compare with) if record includes cf, should be manually reviewed

27 self.nr_flag = [] # (nr = near) should be manually reviewed

28 self.aff_flag = [] # (aff = looks similar to) should be manually reviewed

29 self.sp_flag = False # used to check whether to append 'sp.' to scientific name

31 self.analyze_concept_name()

33 def flatten_taxa_tree(self, tree: Dict, flat: Dict):

34 """

35 Recursive function taking a taxonomy tree returned from WoRMS API. Flattens tree and saves to self.

37 :param Dict tree: The nested taxon tree from WoRMS.

38 :param Dict flat: The newly created flat taxon tree.

39 """

40 flat[tree['rank']] = tree['scientificname']

41 if tree['child'] is not None:

42 self.flatten_taxa_tree(tree['child'], flat)

43 elif self.cf_flag:

44 ranks = ['Species', 'Genus', 'Family', 'Order', 'Class', 'Phylum']

45 for i in range(1, 6): # from Genus -> Phylum

46 if tree['rank'] == ranks[i]:

47 flat[ranks[i - 1]] = f'cf. {" ".join(self.cf_flag)}' # add 'cf. [concept]' to the rank below

49 def load_from_record(self, record: Dict):

50 """

51 Assigns concept values given JSON object.

53 :param Dict record: The JSON object to load data from.

54 """

55 self.aphia_id = record['AphiaID']

56 self.scientific_name = record['scientificname']

57 self.taxon_rank = record['rank']

58 if self.sp_flag:

59 self.scientific_name += ' sp.'

60 if self.cf_flag:

61 self.scientific_name += ' cf.'

62 for name in self.cf_flag:

63 self.scientific_name += ' ' + name

64 if self.nr_flag:

65 self.scientific_name += ' nr.'

66 for name in self.nr_flag:

67 self.scientific_name += ' ' + name

68 if self.aff_flag:

69 self.scientific_name += ' aff.'

70 for name in self.aff_flag:

71 self.scientific_name += ' ' + name

72 if record['authority'] is not None:

73 self.authorship = record['authority']

75 def analyze_concept_name(self):

76 """

77 Analyzes 'extra bits' (eg 'cf', 'sp', '/') off the VARS concept name:

79 EXAMPLE 1:

80 VARS concept name = '[genus] [species] cf'

81 'cf' means compare with, basically we're sure of the genus but not sure of species... but, it looks "close

82 to" this species. We must only report the genus because it's the only thing we're sure of.

83 We fetch the genus info from WoRMS and we save [species] and 'cf' locally to add back later.

84 The record is populated with the genus info from WoRMS, and the final scientific name reported to DSCRTP is:

85 [genus] cf. [species]

86 This is the same for aff. (looks similar to) and nr. (near)

88 EXAMPLE 2:

89 VARS concept name = '[genus] cf sp'

90 '[genus] cf' means we're pretty sure it's this genus, but not 100%.

91 The 'sp' means it's a species in that genus.

92 In this case, we don't care about the 'sp'.

93 The record is populated with the FAMILY info from WoRMS (we need to get the genus's parent, then get the

94 parent info)

95 The final scientific name reported to DSCRTP is: 'cf. [genus]'

96 """

98 if '/' in self.concept_name: # account for concepts with slashes in name, e.g. "Ptilella/Pennatula"

99 self.concept_words = ['NEED_PARENT']

100 return

101

102 self.concept_words = self.concept_name.split(' ') # create an array of the VARS concept name

103 if 'unidentified' in self.concept_words:

104 self.concept_words.remove('unidentified')

105

106 # [genus] [species] cf: entity is identified as species within [genus], similar to [species] but not sure

107 # [genus] cf: entity is similar to [genus], but not sure

108 # can also be [phylum] cf, [class] cf, [subclass] cf, [order] cf, [family] cf

109 if 'cf' in self.concept_words:

110 if 'sp' in self.concept_words: # if sp is in this list, just remove it

111 del self.concept_words[self.concept_words.index('sp')]

112 cf_index = self.concept_words.index('cf') # get where cf is in the list

113

114 # if cf is the second item in the list, we need to query worms for concept's PARENT

115 if cf_index == 1: # this is the '[genus] cf' case

116 del self.concept_words[1] # remove cf from list

117 self.cf_flag = self.concept_words # we'll use this list to add words back at the very end

118 self.concept_words = ['NEED_PARENT'] # get the parent of this concept from HURL later

119

120 else: # this is the '[genus] [species] cf' case

121 # add the word before cf to concept_add_words

122 self.concept_add_words.append(self.concept_words[cf_index - 1])

123 del self.concept_words[cf_index - 1] # delete that word from the list

124 del self.concept_words[cf_index - 1] # delete cf from the list

125 self.cf_flag = self.concept_add_words

126

127 # [genus] sp: entity is identified as a species within the genus [genus], but the species is unknown

128 elif 'sp' in self.concept_words and 'n' not in self.concept_words:

129 self.sp_flag = True # set this to true so we can add 'sp.' at the end

130 sp_index = self.concept_words.index('sp') # get where sp is in the list

131 while sp_index < len(self.concept_words): # get rid of all list items after sp (including sp)

132 if self.concept_words[sp_index] != 'sp': # append all items except sp to descriptors list

133 self.descriptors.append(self.concept_words[sp_index])

134 del self.concept_words[sp_index]

135

136 # [genus] nr [species] &opt[subspecies]: identified as species within [genus], similar to [species] but not sure

137 if 'nr' in self.concept_words:

138 nr_index = self.concept_words.index('nr') # get where nr is in the list

139 while nr_index < len(self.concept_words): # get rid of all list items after nr (including nr)

140 if self.concept_words[nr_index] != 'nr': # append all items except nr to nr_flag

141 self.nr_flag.append(self.concept_words[nr_index])

142 del self.concept_words[nr_index]

143

144 # [genus] aff [species]: identified as species within [genus], similar to [species] but not sure

145 # same as nr

146 if 'aff' in self.concept_words:

147 aff_index = self.concept_words.index('aff') # get where aff is in the list

148 while aff_index < len(self.concept_words): # get rid of all list items after aff (including aff)

149 if self.concept_words[aff_index] != 'aff': # append all items except aff to aff_flag

150 self.aff_flag.append(self.concept_words[aff_index])

151 del self.concept_words[aff_index]

152

153 # [genus] n sp -> scientific name: '[genus]', descriptors: 'Undescribed species'

154 # [genus] (n subgenus) n sp -> scientific name: '[genus]', descriptors: 'Undescribed subgenus, undescribed species'

155 # [family] n gen -> scientific name: '[family]', descriptors: 'Undescribed genus'

156 if 'n' in self.concept_words:

157 n_index = self.concept_words.index('n') # get where n is in the list

158 if 'gen' in self.concept_words:

159 self.descriptors.append('Undescribed genus')

160 elif 'subgenus)' in self.concept_words:

161 n_index = self.concept_words.index('(n') # get where n is in the list

162 self.descriptors.append('Undescribed subgenus, undescribed species')

163 else:

164 self.descriptors.append('Undescribed species')

165

166 while n_index < len(self.concept_words): # get rid of all list items after n (including n)

167 # append all items except n and sp, gen, subgenus to descriptors list

168 if self.concept_words[n_index] != 'n' and self.concept_words[n_index] != 'sp' \

169 and self.concept_words[n_index] != 'gen' and self.concept_words[n_index] != 'subgenus)'\

170 and self.concept_words[n_index] != '(n':

171 self.descriptors.append(self.concept_words[n_index])

172 del self.concept_words[n_index]

Coverage for concept / concept.py: 82%

99 statements