Coverage for concept/concept_handler.py: 91%

207 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-12 17:57 +0000

1import sys 

2import requests 

3 

4from typing import Dict 

5 

6from util.constants import NULL_VAL_STRING 

7from concept.concept import Concept 

8from util.terminal_output import Color 

9 

10 

11class ConceptHandler: 

12 """ 

13 Handles all API requests required to populate Concept fields. Prints status info to terminal. 

14 """ 

15 

16 def __init__(self, concept: Concept): 

17 """ 

18 :param Concept concept: The concept object to update. 

19 """ 

20 self.concept = concept # concept to update 

21 self.phylum = '' # necessary for confirming correct worms record 

22 self.found_worms_match = False # to let user know if matching record has been found 

23 self.unaccepted_names = [] # keep track of these so we don't add them back at the end 

24 

25 if 'NEED_PARENT' in concept.concept_words: 

26 self.find_parent() 

27 

28 def fetch_worms(self): 

29 """ 

30 To easily call all WoRMS queries. 

31 """ 

32 self.fetch_worms_aphia_record() 

33 self.fetch_worms_taxon_tree() 

34 self.fetch_worms_vernaculars() 

35 

36 def fetch_worms_aphia_record(self): 

37 """ 

38 Fetches concept info from WoRMS via API call with Scientific name: 

39 https://www.marinespecies.org/rest/AphiaRecordsByName/[SCIENTIFIC_NAME]?like=true&marine_only=true&offset=1 

40 """ 

41 # if egg, don't bother checking - will need to add more cases if more egg names are specified in VARS 

42 if self.concept.concept_name == 'eggs' or self.concept.concept_name == 'eggcase': 

43 self.concept.scientific_name = 'Animalia' 

44 self.concept.descriptors = ['egg case'] if self.concept.concept_name == 'eggcase' else [self.concept.concept_name] 

45 print("%-40s %-35s" % (self.concept.concept_name, 'None')) 

46 sys.stdout.flush() 

47 return 

48 

49 print(f"{Color.BOLD}%-40s %-35s{Color.END}" % 

50 (self.concept.concept_name, " ".join(self.concept.concept_words)), end='') 

51 sys.stdout.flush() 

52 

53 req = requests.get('https://www.marinespecies.org/rest/AphiaRecordsByName/' + 

54 '%20'.join(self.concept.concept_words) + '?like=false&marine_only=true&offset=1') 

55 if req.status_code == 200: 

56 json_records = req.json() 

57 self.find_accepted_record(json_records, self.concept.concept_words) 

58 else: 

59 print(f'{Color.YELLOW}{"No match" : <15}{Color.END}', end='') 

60 # Check for extra bits 

61 for i in range(len(self.concept.concept_words)): 

62 if self.concept.concept_words[i] == 'shrimp': 

63 self.concept.concept_words[i] = 'Decapoda' 

64 self.concept.descriptors.append('shrimp') 

65 # Then try search WoRMS for each word individually 

66 for word in self.concept.concept_words: 

67 self.concept.concept_add_words.append(word) 

68 # skip this query if the name is exactly the same as the first name we used 

69 if self.concept.concept_name == ' '.join(self.concept.concept_add_words): 

70 break 

71 print(f"\n{Color.BOLD}%-40s %-35s{Color.END}" % 

72 ('', " ".join(self.concept.concept_add_words)), end='') 

73 sys.stdout.flush() 

74 req = requests.get('https://www.marinespecies.org/rest/AphiaRecordsByName/' + '%20'.join( 

75 self.concept.concept_add_words) + '?like=false&marine_only=true&offset=1') 

76 if req.status_code == 200: 

77 json_records = req.json() 

78 self.find_accepted_record(json_records, self.concept.concept_words) 

79 else: 

80 print(f'{Color.YELLOW}{"No match" : <15}{Color.END}', end='') 

81 self.concept.descriptors.append(word) 

82 self.concept.concept_add_words.remove(word) 

83 

84 if self.concept.concept_add_words: 

85 for word in self.concept.concept_add_words: 

86 if word not in self.concept.scientific_name and word not in self.unaccepted_names: 

87 self.concept.descriptors.append(word) 

88 

89 def find_parent(self): 

90 """ 

91 Gets concept's parent from VARS kb: 

92 http://hurlstor.soest.hawaii.edu:8083/v1/phylogeny/up/[VARS_CONCEPT_NAME] 

93 """ 

94 parent = NULL_VAL_STRING 

95 temp_name = self.concept.concept_name 

96 if '/' in temp_name: 

97 # account for concepts with slashes in name, e.g. "Ptilella/Pennatula" 

98 # we'll find the lowest common parent and use that as the concept to get info for from WoRMS 

99 concept1_flat_tree = {} 

100 concept2_flat_tree = {} 

101 

102 # the first concept (eg Ptilella) 

103 vars_tax_res = requests.get(f'http://hurlstor.soest.hawaii.edu:8083/v1/phylogeny/up/{temp_name.split("/")[0]}') 

104 if vars_tax_res.status_code == 200: 

105 # this get us to kingdom 

106 vars_tree = vars_tax_res.json()['children'][0]['children'][0]['children'][0]['children'][0] 

107 while 'children' in vars_tree.keys(): 

108 # get to the bottom, filling flattened tree 

109 concept1_flat_tree[vars_tree['rank']] = vars_tree['name'] 

110 vars_tree = vars_tree['children'][0] 

111 else: 

112 print(f'Unable to find record for {temp_name.split("/")[0]}') 

113 

114 # the second concept (eg Pennatula) 

115 if temp_name.split("/")[1] == 'n genus': 

116 # if the second concept is 'n genus', just use the first concept 

117 concept2_flat_tree = concept1_flat_tree 

118 else: 

119 vars_tax_res = requests.get(f'http://hurlstor.soest.hawaii.edu:8083/v1/phylogeny/up/{temp_name.split("/")[1]}') 

120 if vars_tax_res.status_code == 200: 

121 vars_tree = vars_tax_res.json()['children'][0]['children'][0]['children'][0]['children'][0] 

122 while 'children' in vars_tree.keys(): 

123 # get to the bottom, filling flattened tree 

124 concept2_flat_tree[vars_tree['rank']] = vars_tree['name'] 

125 vars_tree = vars_tree['children'][0] 

126 else: 

127 print(f'Unable to find record for {temp_name.split("/")[1]}') 

128 

129 match = False 

130 for key in ['subspecies', 'species', 'subgenus', 'genus', 'subfamily', 'family', 'suborder', 

131 'order', 'subclass', 'class', 'phylum', 'kingdom']: 

132 if key in concept1_flat_tree.keys() and key in concept2_flat_tree.keys(): 

133 self.concept.concept_words = [concept1_flat_tree[key]] 

134 match = True 

135 break 

136 if not match: 

137 print(f'Unable to find common parent for {self.concept.concept_name}') 

138 

139 else: 

140 vars_tax_res = requests.get(f'http://hurlstor.soest.hawaii.edu:8083/v1/phylogeny/up/{temp_name}') 

141 if vars_tax_res.status_code == 200: 

142 # this get us to kingdom 

143 vars_tree = vars_tax_res.json()['children'][0]['children'][0]['children'][0]['children'][0] 

144 temp_tree = vars_tree 

145 while 'children' in vars_tree.keys(): 

146 # get down to the bottom 

147 temp_tree = vars_tree 

148 vars_tree = vars_tree['children'][0] 

149 parent = temp_tree['name'] 

150 else: 

151 print(f'Unable to find record for {self.concept.concept_name}') 

152 self.concept.concept_words = [parent] 

153 

154 def find_accepted_record(self, json_records: list, concept_words: list): 

155 """ 

156 Finds matching record in API query from WoRMS: 

157 http://hurlstor.soest.hawaii.edu:8083/v1/phylogeny/up/[VARS_CONCEPT_NAME] 

158 

159 :param list json_records: A list of JSON objects returned by WoRMS that match the given concept name. 

160 :param list concept_words: The words we should use to query WoRMS. 

161 """ 

162 """ 

163 Problem: There are multiple concepts with the same scientific name. 

164 e.g. Stolonifera: there is one concept named Stolonifera in phylum Bryozoa and another concept named 

165 Stolonifera in phylum Cnidaria. We want the one from Cnidaria, but its status is unaccepted (so we can't 

166 simply check the concept's status in the response JSON and use that concept). 

167 

168 Solution: If there is more than one object in the response body, get the concept's phylum by doing a VARS API 

169 query with the concept name. Use the object in the response whose phylum matches the VARS phylum. If there 

170 is more than one match, go with the match that is accepted. 

171 """ 

172 if len(json_records) == 1: 

173 # there is only one record, use it 

174 self.check_status(json_records[0]) 

175 else: 

176 # there are multiple records - we need to ping vars for phylum and find the record that matches 

177 vars_tax_res = requests.get(f'http://hurlstor.soest.hawaii.edu:8083/v1/phylogeny/up/{"%20".join(concept_words)}') 

178 if vars_tax_res.status_code == 200: 

179 # this get us to kingdom 

180 vars_tree = vars_tax_res.json()['children'][0]['children'][0]['children'][0]['children'][0] 

181 while not self.phylum: 

182 # find the phylum in the response tree 

183 vars_tree = vars_tree['children'][0] 

184 if 'rank' in vars_tree.keys() and vars_tree['rank'] == 'phylum': 

185 self.phylum = vars_tree['name'] 

186 

187 record_list = [] 

188 for record in json_records: 

189 # get record with matching phylum 

190 if record['phylum'] == self.phylum: 

191 record_list.append(record) 

192 

193 for i in range(len(record_list)): 

194 # look for accepted record in matching phylum list 

195 if record_list[i]['status'] == 'accepted': 

196 self.check_status(record_list[i]) 

197 del record_list[i] 

198 break 

199 

200 if not self.found_worms_match: 

201 if record_list: 

202 self.check_status(record_list[0]) 

203 else: 

204 print(f'{Color.RED}{"No match" : <15}{Color.END}') 

205 

206 def check_status(self, json_record: Dict): 

207 """ 

208 Checks a record to see if it has a status of 'accepted'. If it does, it uses that record to load concept info. 

209 If it doesn't, it fetches the 'valid name' record that the unaccepted record points to. 

210 

211 :param Dict json_record: The record to check. 

212 """ 

213 

214 def fetch_valid_name_record(): 

215 print(f"{Color.BOLD}%-40s %-35s{Color.END}" % ('', json_record['valid_name']), end='') 

216 sys.stdout.flush() 

217 req = requests.get('https://www.marinespecies.org/rest/AphiaRecordsByName/' + 

218 json_record['valid_name'] + '?like=false&marine_only=true&offset=1') 

219 if req.status_code == 200: 

220 json_records = req.json() 

221 self.find_accepted_record(json_records, json_record['valid_name']) 

222 else: 

223 print(f'{Color.RED}{"No match" : <15}{Color.END}') 

224 

225 if json_record['status'] == 'accepted': 

226 # status is accepted, use this record 

227 print(f'{Color.GREEN}{" ✓" : <15}{Color.END}', end='') 

228 sys.stdout.flush() 

229 self.found_worms_match = True 

230 self.concept.load_from_record(json_record) 

231 elif json_record['status'] == 'unaccepted': 

232 # status is unaccepted 

233 print(f'{Color.RED}Unaccepted{Color.END}') 

234 self.unaccepted_names.append(json_record['scientificname']) 

235 if json_record['valid_name'] == json_record['scientificname']: 

236 # There is at least one case in WoRMS where the record is unaccepted, but the "accepted name" is the 

237 # same as the current scientific name and the "valid aphia ID" is the same as the current aphia ID: 

238 # https://www.marinespecies.org/rest/AphiaRecordsByName/Acroechinoidea 

239 # In this case, we just go with the parent 

240 self.find_parent() 

241 self.fetch_worms_aphia_record() 

242 else: 

243 # fetch the valid name record 

244 fetch_valid_name_record() 

245 else: 

246 # status is something other than accepted or unaccepted ('uncertain', 'alternate representation', etc) 

247 if json_record['valid_name'] == json_record['scientificname']: 

248 # valid name is the same as the current scientific name, just use this record 

249 print(f'{Color.GREEN}{" ✓" : <15}{Color.END}', end='') 

250 sys.stdout.flush() 

251 self.found_worms_match = True 

252 self.concept.load_from_record(json_record) 

253 else: 

254 # valid name is different from the current scientific name, fetch the valid name record 

255 print(f'{Color.YELLOW}{json_record["status"]}{Color.END}') 

256 self.unaccepted_names.append(json_record['scientificname']) 

257 fetch_valid_name_record() 

258 

259 def fetch_worms_taxon_tree(self): 

260 """ 

261 Pulls phylogeny/taxon tree info from WoRMS: 

262 https://www.marinespecies.org/rest/AphiaClassificationByAphiaID/[APHIA_ID] 

263 """ 

264 if self.concept.concept_name == 'eggs' or self.concept.concept_name == 'eggcase': 

265 self.concept.taxon_ranks = {'Kingdom': 'Animalia'} 

266 return 

267 

268 if self.concept.scientific_name != NULL_VAL_STRING: 

269 req = requests.get(f'https://www.marinespecies.org/rest/AphiaClassificationByAphiaID/{self.concept.aphia_id}') 

270 if req.status_code == 200: 

271 taxon_tree = req.json() 

272 self.concept.flatten_taxa_tree(taxon_tree, self.concept.taxon_ranks) 

273 print(f'{Color.GREEN}{" ✓" : <15}{Color.END}', end='') 

274 sys.stdout.flush() 

275 else: 

276 print(f'{Color.RED}{"No match" : <15}{Color.END}') 

277 

278 def fetch_worms_vernaculars(self): 

279 """ 

280 Fetches all english vernacular names for a given aphia ID from WoRMS: 

281 https://www.marinespecies.org/rest/AphiaVernacularsByAphiaID/[APHIA_ID] 

282 """ 

283 if self.concept.concept_name == 'eggs' or self.concept.concept_name == 'eggcase': 

284 return 

285 vern_names = NULL_VAL_STRING 

286 req = requests.get(f'https://www.marinespecies.org/rest/AphiaVernacularsByAphiaID/{self.concept.aphia_id}') 

287 if req.status_code == 200: 

288 for record in req.json(): 

289 if record['language_code'] == 'eng': 

290 if vern_names != NULL_VAL_STRING: 

291 vern_names = f'{vern_names} | {record["vernacular"]}' 

292 else: 

293 vern_names = record["vernacular"] 

294 print(f'{Color.GREEN}{" ✓" : <15}{Color.END}', end='') 

295 sys.stdout.flush() 

296 else: 

297 print(f'{"None found" : <15}', end='') 

298 

299 self.concept.vernacular_names = vern_names 

300 

301 def fetch_vars_synonyms(self, warning_messages: list): 

302 """ 

303 Fetches concept info from VARS kb: 

304 http://hurlstor.soest.hawaii.edu:8083/v1/concept/[VARS_CONCEPT_NAME] 

305 

306 Gets synonyms and checks if concept name is an alternate (old) name. If it is, query WoRMS again. 

307 

308 :param list warning_messages: The list of warning messages to display at the end of the script. 

309 """ 

310 if self.concept.concept_name == 'eggs' or self.concept.concept_name == 'eggcase': 

311 return 

312 temp_name = self.concept.concept_name 

313 if '/' in temp_name: 

314 temp_name = ' '.join(self.concept.concept_words) # use the parent we got earlier 

315 nicknames = [] 

316 req = requests.get(f'http://hurlstor.soest.hawaii.edu:8083/v1/concept/{temp_name.replace(" ", "%20")}') 

317 if req.status_code == 200: 

318 json_obj = req.json() 

319 if self.concept.concept_name in json_obj['alternateNames']: 

320 # the concept name we've been using is, in fact, an alternate name 

321 if self.concept.scientific_name == json_obj['name']: 

322 # the WoRMS query already returned the corrected name 

323 pass 

324 else: 

325 print(f'{Color.YELLOW}Alternate name{Color.END}') 

326 # we need to query worms for the correct concept name 

327 updated_concept = Concept(concept_name=json_obj['name']) 

328 cons_handler = ConceptHandler(concept=updated_concept) 

329 cons_handler.fetch_worms() 

330 cons_handler.fetch_vars_synonyms(warning_messages=[]) 

331 

332 self.concept.scientific_name = updated_concept.scientific_name 

333 self.concept.aphia_id = updated_concept.aphia_id 

334 self.concept.authorship = updated_concept.authorship 

335 self.concept.synonyms = updated_concept.synonyms 

336 self.concept.taxon_rank = updated_concept.taxon_rank 

337 self.concept.taxon_ranks = updated_concept.taxon_ranks 

338 self.concept.descriptors = updated_concept.descriptors 

339 self.concept.vernacular_names = updated_concept.vernacular_names 

340 warning_messages.append([ 

341 '', 

342 self.concept.concept_name, 

343 '', 

344 f'Alternate concept name found - used "{json_obj["name"]}" instead' 

345 ]) 

346 return 

347 

348 for syn in json_obj['alternateNames']: 

349 # names starting with a lowercase letter are common names, not of interest 

350 if syn[0].isupper(): 

351 nicknames.append(syn) 

352 print(f'{Color.GREEN} ✓{Color.END}') if nicknames else print('None found') 

353 self.concept.synonyms = nicknames 

354 else: 

355 print('No match found')