Coverage for application / util / phylogeny_cache.py: 68%
65 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-23 05:22 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-23 05:22 +0000
1import json
2import os
4import requests
6from application.util.constants import TERM_RED, TERM_YELLOW, TERM_NORMAL
7from application.util.functions import flatten_taxa_tree
9CACHE_PATH = os.path.join('cache', 'phylogeny.json')
10WORMS_REST_URL = 'https://www.marinespecies.org/rest'
13class PhylogenyCache:
14 def __init__(self):
15 self.data = {}
16 self.load()
18 def load(self):
19 try:
20 with open(CACHE_PATH, 'r') as f:
21 self.data = json.load(f)
22 except FileNotFoundError:
23 self.data = {'Animalia': {}}
25 def save(self):
26 try:
27 with open(CACHE_PATH, 'w') as f:
28 json.dump(self.data, f, indent=2)
29 except FileNotFoundError:
30 os.makedirs('cache')
31 with open(CACHE_PATH, 'w') as f:
32 json.dump(self.data, f, indent=2)
34 def fetch_vars(self, concept_name: str, vars_kb_url: str, no_match_records: set):
35 """
36 Fetches phylogeny for a given concept from the VARS knowledge base.
37 """
38 print(f'Fetching phylogeny for "{concept_name}" from VARS')
39 vars_tax_res = requests.get(url=f'{vars_kb_url}/phylogeny/up/{concept_name.replace("/", "%2F")}')
40 if vars_tax_res.status_code == 200:
41 try:
42 # this gets us to phylum
43 vars_tree = vars_tax_res.json()['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]
44 self.data[concept_name] = {}
45 except KeyError:
46 if concept_name not in no_match_records:
47 no_match_records.add(concept_name)
48 print(f'{TERM_YELLOW}WARNING: Could not find phylogeny for concept "{concept_name}" in VARS knowledge base{TERM_NORMAL}')
49 vars_tree = {}
50 while 'children' in vars_tree.keys():
51 if 'rank' in vars_tree.keys(): # sometimes it's not
52 self.data[concept_name][vars_tree['rank']] = vars_tree['name']
53 vars_tree = vars_tree['children'][0]
54 if 'rank' in vars_tree.keys():
55 self.data[concept_name][vars_tree['rank']] = vars_tree['name']
56 else:
57 print(f'\n{TERM_RED}Unable to find record for {concept_name}{TERM_NORMAL}')
59 def fetch_worms(self, scientific_name: str) -> bool:
60 """
61 Fetches phylogeny for a given scientific name from WoRMS. Returns True if successful, False otherwise.
62 """
63 print(f'Fetching phylogeny for "{scientific_name}" from WoRMS')
64 worms_id_res = requests.get(url=f'{WORMS_REST_URL}/AphiaIDByName/{scientific_name}?marine_only=true')
65 if worms_id_res.status_code == 200 and worms_id_res.json() != -999: # -999 means more than one matching record
66 aphia_id = worms_id_res.json()
67 worms_tree_res = requests.get(url=f'{WORMS_REST_URL}/AphiaClassificationByAphiaID/{aphia_id}')
68 if worms_tree_res.status_code == 200:
69 self.data[scientific_name] = flatten_taxa_tree(worms_tree_res.json(), {})
70 self.data[scientific_name]['aphia_id'] = aphia_id
71 else:
72 worms_name_res = requests.get(url=f'{WORMS_REST_URL}/AphiaRecordsByName/{scientific_name}?like=false&marine_only=true&offset=1')
73 if worms_name_res.status_code == 200 and len(worms_name_res.json()) > 0:
74 # just take the first accepted record
75 for record in worms_name_res.json():
76 if record['status'] == 'accepted':
77 worms_tree_res_2 = requests.get(url=f'{WORMS_REST_URL}/AphiaClassificationByAphiaID/{record["AphiaID"]}')
78 if worms_tree_res_2.status_code == 200:
79 self.data[scientific_name] = flatten_taxa_tree(worms_tree_res_2.json(), {})
80 self.data[scientific_name]['aphia_id'] = record['AphiaID']
81 break
82 else:
83 print(f'{TERM_RED}No accepted record found for concept name "{scientific_name}"{TERM_NORMAL}')
84 return False
85 return True