Coverage for concept/concept_handler.py: 91%
207 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-12 17:57 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-12 17:57 +0000
1import sys
2import requests
4from typing import Dict
6from util.constants import NULL_VAL_STRING
7from concept.concept import Concept
8from util.terminal_output import Color
11class ConceptHandler:
12 """
13 Handles all API requests required to populate Concept fields. Prints status info to terminal.
14 """
16 def __init__(self, concept: Concept):
17 """
18 :param Concept concept: The concept object to update.
19 """
20 self.concept = concept # concept to update
21 self.phylum = '' # necessary for confirming correct worms record
22 self.found_worms_match = False # to let user know if matching record has been found
23 self.unaccepted_names = [] # keep track of these so we don't add them back at the end
25 if 'NEED_PARENT' in concept.concept_words:
26 self.find_parent()
28 def fetch_worms(self):
29 """
30 To easily call all WoRMS queries.
31 """
32 self.fetch_worms_aphia_record()
33 self.fetch_worms_taxon_tree()
34 self.fetch_worms_vernaculars()
36 def fetch_worms_aphia_record(self):
37 """
38 Fetches concept info from WoRMS via API call with Scientific name:
39 https://www.marinespecies.org/rest/AphiaRecordsByName/[SCIENTIFIC_NAME]?like=true&marine_only=true&offset=1
40 """
41 # if egg, don't bother checking - will need to add more cases if more egg names are specified in VARS
42 if self.concept.concept_name == 'eggs' or self.concept.concept_name == 'eggcase':
43 self.concept.scientific_name = 'Animalia'
44 self.concept.descriptors = ['egg case'] if self.concept.concept_name == 'eggcase' else [self.concept.concept_name]
45 print("%-40s %-35s" % (self.concept.concept_name, 'None'))
46 sys.stdout.flush()
47 return
49 print(f"{Color.BOLD}%-40s %-35s{Color.END}" %
50 (self.concept.concept_name, " ".join(self.concept.concept_words)), end='')
51 sys.stdout.flush()
53 req = requests.get('https://www.marinespecies.org/rest/AphiaRecordsByName/' +
54 '%20'.join(self.concept.concept_words) + '?like=false&marine_only=true&offset=1')
55 if req.status_code == 200:
56 json_records = req.json()
57 self.find_accepted_record(json_records, self.concept.concept_words)
58 else:
59 print(f'{Color.YELLOW}{"No match" : <15}{Color.END}', end='')
60 # Check for extra bits
61 for i in range(len(self.concept.concept_words)):
62 if self.concept.concept_words[i] == 'shrimp':
63 self.concept.concept_words[i] = 'Decapoda'
64 self.concept.descriptors.append('shrimp')
65 # Then try search WoRMS for each word individually
66 for word in self.concept.concept_words:
67 self.concept.concept_add_words.append(word)
68 # skip this query if the name is exactly the same as the first name we used
69 if self.concept.concept_name == ' '.join(self.concept.concept_add_words):
70 break
71 print(f"\n{Color.BOLD}%-40s %-35s{Color.END}" %
72 ('', " ".join(self.concept.concept_add_words)), end='')
73 sys.stdout.flush()
74 req = requests.get('https://www.marinespecies.org/rest/AphiaRecordsByName/' + '%20'.join(
75 self.concept.concept_add_words) + '?like=false&marine_only=true&offset=1')
76 if req.status_code == 200:
77 json_records = req.json()
78 self.find_accepted_record(json_records, self.concept.concept_words)
79 else:
80 print(f'{Color.YELLOW}{"No match" : <15}{Color.END}', end='')
81 self.concept.descriptors.append(word)
82 self.concept.concept_add_words.remove(word)
84 if self.concept.concept_add_words:
85 for word in self.concept.concept_add_words:
86 if word not in self.concept.scientific_name and word not in self.unaccepted_names:
87 self.concept.descriptors.append(word)
89 def find_parent(self):
90 """
91 Gets concept's parent from VARS kb:
92 http://hurlstor.soest.hawaii.edu:8083/v1/phylogeny/up/[VARS_CONCEPT_NAME]
93 """
94 parent = NULL_VAL_STRING
95 temp_name = self.concept.concept_name
96 if '/' in temp_name:
97 # account for concepts with slashes in name, e.g. "Ptilella/Pennatula"
98 # we'll find the lowest common parent and use that as the concept to get info for from WoRMS
99 concept1_flat_tree = {}
100 concept2_flat_tree = {}
102 # the first concept (eg Ptilella)
103 vars_tax_res = requests.get(f'http://hurlstor.soest.hawaii.edu:8083/v1/phylogeny/up/{temp_name.split("/")[0]}')
104 if vars_tax_res.status_code == 200:
105 # this get us to kingdom
106 vars_tree = vars_tax_res.json()['children'][0]['children'][0]['children'][0]['children'][0]
107 while 'children' in vars_tree.keys():
108 # get to the bottom, filling flattened tree
109 concept1_flat_tree[vars_tree['rank']] = vars_tree['name']
110 vars_tree = vars_tree['children'][0]
111 else:
112 print(f'Unable to find record for {temp_name.split("/")[0]}')
114 # the second concept (eg Pennatula)
115 if temp_name.split("/")[1] == 'n genus':
116 # if the second concept is 'n genus', just use the first concept
117 concept2_flat_tree = concept1_flat_tree
118 else:
119 vars_tax_res = requests.get(f'http://hurlstor.soest.hawaii.edu:8083/v1/phylogeny/up/{temp_name.split("/")[1]}')
120 if vars_tax_res.status_code == 200:
121 vars_tree = vars_tax_res.json()['children'][0]['children'][0]['children'][0]['children'][0]
122 while 'children' in vars_tree.keys():
123 # get to the bottom, filling flattened tree
124 concept2_flat_tree[vars_tree['rank']] = vars_tree['name']
125 vars_tree = vars_tree['children'][0]
126 else:
127 print(f'Unable to find record for {temp_name.split("/")[1]}')
129 match = False
130 for key in ['subspecies', 'species', 'subgenus', 'genus', 'subfamily', 'family', 'suborder',
131 'order', 'subclass', 'class', 'phylum', 'kingdom']:
132 if key in concept1_flat_tree.keys() and key in concept2_flat_tree.keys():
133 self.concept.concept_words = [concept1_flat_tree[key]]
134 match = True
135 break
136 if not match:
137 print(f'Unable to find common parent for {self.concept.concept_name}')
139 else:
140 vars_tax_res = requests.get(f'http://hurlstor.soest.hawaii.edu:8083/v1/phylogeny/up/{temp_name}')
141 if vars_tax_res.status_code == 200:
142 # this get us to kingdom
143 vars_tree = vars_tax_res.json()['children'][0]['children'][0]['children'][0]['children'][0]
144 temp_tree = vars_tree
145 while 'children' in vars_tree.keys():
146 # get down to the bottom
147 temp_tree = vars_tree
148 vars_tree = vars_tree['children'][0]
149 parent = temp_tree['name']
150 else:
151 print(f'Unable to find record for {self.concept.concept_name}')
152 self.concept.concept_words = [parent]
154 def find_accepted_record(self, json_records: list, concept_words: list):
155 """
156 Finds matching record in API query from WoRMS:
157 http://hurlstor.soest.hawaii.edu:8083/v1/phylogeny/up/[VARS_CONCEPT_NAME]
159 :param list json_records: A list of JSON objects returned by WoRMS that match the given concept name.
160 :param list concept_words: The words we should use to query WoRMS.
161 """
162 """
163 Problem: There are multiple concepts with the same scientific name.
164 e.g. Stolonifera: there is one concept named Stolonifera in phylum Bryozoa and another concept named
165 Stolonifera in phylum Cnidaria. We want the one from Cnidaria, but its status is unaccepted (so we can't
166 simply check the concept's status in the response JSON and use that concept).
168 Solution: If there is more than one object in the response body, get the concept's phylum by doing a VARS API
169 query with the concept name. Use the object in the response whose phylum matches the VARS phylum. If there
170 is more than one match, go with the match that is accepted.
171 """
172 if len(json_records) == 1:
173 # there is only one record, use it
174 self.check_status(json_records[0])
175 else:
176 # there are multiple records - we need to ping vars for phylum and find the record that matches
177 vars_tax_res = requests.get(f'http://hurlstor.soest.hawaii.edu:8083/v1/phylogeny/up/{"%20".join(concept_words)}')
178 if vars_tax_res.status_code == 200:
179 # this get us to kingdom
180 vars_tree = vars_tax_res.json()['children'][0]['children'][0]['children'][0]['children'][0]
181 while not self.phylum:
182 # find the phylum in the response tree
183 vars_tree = vars_tree['children'][0]
184 if 'rank' in vars_tree.keys() and vars_tree['rank'] == 'phylum':
185 self.phylum = vars_tree['name']
187 record_list = []
188 for record in json_records:
189 # get record with matching phylum
190 if record['phylum'] == self.phylum:
191 record_list.append(record)
193 for i in range(len(record_list)):
194 # look for accepted record in matching phylum list
195 if record_list[i]['status'] == 'accepted':
196 self.check_status(record_list[i])
197 del record_list[i]
198 break
200 if not self.found_worms_match:
201 if record_list:
202 self.check_status(record_list[0])
203 else:
204 print(f'{Color.RED}{"No match" : <15}{Color.END}')
206 def check_status(self, json_record: Dict):
207 """
208 Checks a record to see if it has a status of 'accepted'. If it does, it uses that record to load concept info.
209 If it doesn't, it fetches the 'valid name' record that the unaccepted record points to.
211 :param Dict json_record: The record to check.
212 """
214 def fetch_valid_name_record():
215 print(f"{Color.BOLD}%-40s %-35s{Color.END}" % ('', json_record['valid_name']), end='')
216 sys.stdout.flush()
217 req = requests.get('https://www.marinespecies.org/rest/AphiaRecordsByName/' +
218 json_record['valid_name'] + '?like=false&marine_only=true&offset=1')
219 if req.status_code == 200:
220 json_records = req.json()
221 self.find_accepted_record(json_records, json_record['valid_name'])
222 else:
223 print(f'{Color.RED}{"No match" : <15}{Color.END}')
225 if json_record['status'] == 'accepted':
226 # status is accepted, use this record
227 print(f'{Color.GREEN}{" ✓" : <15}{Color.END}', end='')
228 sys.stdout.flush()
229 self.found_worms_match = True
230 self.concept.load_from_record(json_record)
231 elif json_record['status'] == 'unaccepted':
232 # status is unaccepted
233 print(f'{Color.RED}Unaccepted{Color.END}')
234 self.unaccepted_names.append(json_record['scientificname'])
235 if json_record['valid_name'] == json_record['scientificname']:
236 # There is at least one case in WoRMS where the record is unaccepted, but the "accepted name" is the
237 # same as the current scientific name and the "valid aphia ID" is the same as the current aphia ID:
238 # https://www.marinespecies.org/rest/AphiaRecordsByName/Acroechinoidea
239 # In this case, we just go with the parent
240 self.find_parent()
241 self.fetch_worms_aphia_record()
242 else:
243 # fetch the valid name record
244 fetch_valid_name_record()
245 else:
246 # status is something other than accepted or unaccepted ('uncertain', 'alternate representation', etc)
247 if json_record['valid_name'] == json_record['scientificname']:
248 # valid name is the same as the current scientific name, just use this record
249 print(f'{Color.GREEN}{" ✓" : <15}{Color.END}', end='')
250 sys.stdout.flush()
251 self.found_worms_match = True
252 self.concept.load_from_record(json_record)
253 else:
254 # valid name is different from the current scientific name, fetch the valid name record
255 print(f'{Color.YELLOW}{json_record["status"]}{Color.END}')
256 self.unaccepted_names.append(json_record['scientificname'])
257 fetch_valid_name_record()
259 def fetch_worms_taxon_tree(self):
260 """
261 Pulls phylogeny/taxon tree info from WoRMS:
262 https://www.marinespecies.org/rest/AphiaClassificationByAphiaID/[APHIA_ID]
263 """
264 if self.concept.concept_name == 'eggs' or self.concept.concept_name == 'eggcase':
265 self.concept.taxon_ranks = {'Kingdom': 'Animalia'}
266 return
268 if self.concept.scientific_name != NULL_VAL_STRING:
269 req = requests.get(f'https://www.marinespecies.org/rest/AphiaClassificationByAphiaID/{self.concept.aphia_id}')
270 if req.status_code == 200:
271 taxon_tree = req.json()
272 self.concept.flatten_taxa_tree(taxon_tree, self.concept.taxon_ranks)
273 print(f'{Color.GREEN}{" ✓" : <15}{Color.END}', end='')
274 sys.stdout.flush()
275 else:
276 print(f'{Color.RED}{"No match" : <15}{Color.END}')
278 def fetch_worms_vernaculars(self):
279 """
280 Fetches all english vernacular names for a given aphia ID from WoRMS:
281 https://www.marinespecies.org/rest/AphiaVernacularsByAphiaID/[APHIA_ID]
282 """
283 if self.concept.concept_name == 'eggs' or self.concept.concept_name == 'eggcase':
284 return
285 vern_names = NULL_VAL_STRING
286 req = requests.get(f'https://www.marinespecies.org/rest/AphiaVernacularsByAphiaID/{self.concept.aphia_id}')
287 if req.status_code == 200:
288 for record in req.json():
289 if record['language_code'] == 'eng':
290 if vern_names != NULL_VAL_STRING:
291 vern_names = f'{vern_names} | {record["vernacular"]}'
292 else:
293 vern_names = record["vernacular"]
294 print(f'{Color.GREEN}{" ✓" : <15}{Color.END}', end='')
295 sys.stdout.flush()
296 else:
297 print(f'{"None found" : <15}', end='')
299 self.concept.vernacular_names = vern_names
301 def fetch_vars_synonyms(self, warning_messages: list):
302 """
303 Fetches concept info from VARS kb:
304 http://hurlstor.soest.hawaii.edu:8083/v1/concept/[VARS_CONCEPT_NAME]
306 Gets synonyms and checks if concept name is an alternate (old) name. If it is, query WoRMS again.
308 :param list warning_messages: The list of warning messages to display at the end of the script.
309 """
310 if self.concept.concept_name == 'eggs' or self.concept.concept_name == 'eggcase':
311 return
312 temp_name = self.concept.concept_name
313 if '/' in temp_name:
314 temp_name = ' '.join(self.concept.concept_words) # use the parent we got earlier
315 nicknames = []
316 req = requests.get(f'http://hurlstor.soest.hawaii.edu:8083/v1/concept/{temp_name.replace(" ", "%20")}')
317 if req.status_code == 200:
318 json_obj = req.json()
319 if self.concept.concept_name in json_obj['alternateNames']:
320 # the concept name we've been using is, in fact, an alternate name
321 if self.concept.scientific_name == json_obj['name']:
322 # the WoRMS query already returned the corrected name
323 pass
324 else:
325 print(f'{Color.YELLOW}Alternate name{Color.END}')
326 # we need to query worms for the correct concept name
327 updated_concept = Concept(concept_name=json_obj['name'])
328 cons_handler = ConceptHandler(concept=updated_concept)
329 cons_handler.fetch_worms()
330 cons_handler.fetch_vars_synonyms(warning_messages=[])
332 self.concept.scientific_name = updated_concept.scientific_name
333 self.concept.aphia_id = updated_concept.aphia_id
334 self.concept.authorship = updated_concept.authorship
335 self.concept.synonyms = updated_concept.synonyms
336 self.concept.taxon_rank = updated_concept.taxon_rank
337 self.concept.taxon_ranks = updated_concept.taxon_ranks
338 self.concept.descriptors = updated_concept.descriptors
339 self.concept.vernacular_names = updated_concept.vernacular_names
340 warning_messages.append([
341 '',
342 self.concept.concept_name,
343 '',
344 f'Alternate concept name found - used "{json_obj["name"]}" instead'
345 ])
346 return
348 for syn in json_obj['alternateNames']:
349 # names starting with a lowercase letter are common names, not of interest
350 if syn[0].isupper():
351 nicknames.append(syn)
352 print(f'{Color.GREEN} ✓{Color.END}') if nicknames else print('None found')
353 self.concept.synonyms = nicknames
354 else:
355 print('No match found')