Coverage for application / util / phylogeny_cache.py: 68%

65 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-23 05:22 +0000

1import json 

2import os 

3 

4import requests 

5 

6from application.util.constants import TERM_RED, TERM_YELLOW, TERM_NORMAL 

7from application.util.functions import flatten_taxa_tree 

8 

9CACHE_PATH = os.path.join('cache', 'phylogeny.json') 

10WORMS_REST_URL = 'https://www.marinespecies.org/rest' 

11 

12 

13class PhylogenyCache: 

14 def __init__(self): 

15 self.data = {} 

16 self.load() 

17 

18 def load(self): 

19 try: 

20 with open(CACHE_PATH, 'r') as f: 

21 self.data = json.load(f) 

22 except FileNotFoundError: 

23 self.data = {'Animalia': {}} 

24 

25 def save(self): 

26 try: 

27 with open(CACHE_PATH, 'w') as f: 

28 json.dump(self.data, f, indent=2) 

29 except FileNotFoundError: 

30 os.makedirs('cache') 

31 with open(CACHE_PATH, 'w') as f: 

32 json.dump(self.data, f, indent=2) 

33 

34 def fetch_vars(self, concept_name: str, vars_kb_url: str, no_match_records: set): 

35 """ 

36 Fetches phylogeny for a given concept from the VARS knowledge base. 

37 """ 

38 print(f'Fetching phylogeny for "{concept_name}" from VARS') 

39 vars_tax_res = requests.get(url=f'{vars_kb_url}/phylogeny/up/{concept_name.replace("/", "%2F")}') 

40 if vars_tax_res.status_code == 200: 

41 try: 

42 # this gets us to phylum 

43 vars_tree = vars_tax_res.json()['children'][0]['children'][0]['children'][0]['children'][0]['children'][0] 

44 self.data[concept_name] = {} 

45 except KeyError: 

46 if concept_name not in no_match_records: 

47 no_match_records.add(concept_name) 

48 print(f'{TERM_YELLOW}WARNING: Could not find phylogeny for concept "{concept_name}" in VARS knowledge base{TERM_NORMAL}') 

49 vars_tree = {} 

50 while 'children' in vars_tree.keys(): 

51 if 'rank' in vars_tree.keys(): # sometimes it's not 

52 self.data[concept_name][vars_tree['rank']] = vars_tree['name'] 

53 vars_tree = vars_tree['children'][0] 

54 if 'rank' in vars_tree.keys(): 

55 self.data[concept_name][vars_tree['rank']] = vars_tree['name'] 

56 else: 

57 print(f'\n{TERM_RED}Unable to find record for {concept_name}{TERM_NORMAL}') 

58 

59 def fetch_worms(self, scientific_name: str) -> bool: 

60 """ 

61 Fetches phylogeny for a given scientific name from WoRMS. Returns True if successful, False otherwise. 

62 """ 

63 print(f'Fetching phylogeny for "{scientific_name}" from WoRMS') 

64 worms_id_res = requests.get(url=f'{WORMS_REST_URL}/AphiaIDByName/{scientific_name}?marine_only=true') 

65 if worms_id_res.status_code == 200 and worms_id_res.json() != -999: # -999 means more than one matching record 

66 aphia_id = worms_id_res.json() 

67 worms_tree_res = requests.get(url=f'{WORMS_REST_URL}/AphiaClassificationByAphiaID/{aphia_id}') 

68 if worms_tree_res.status_code == 200: 

69 self.data[scientific_name] = flatten_taxa_tree(worms_tree_res.json(), {}) 

70 self.data[scientific_name]['aphia_id'] = aphia_id 

71 else: 

72 worms_name_res = requests.get(url=f'{WORMS_REST_URL}/AphiaRecordsByName/{scientific_name}?like=false&marine_only=true&offset=1') 

73 if worms_name_res.status_code == 200 and len(worms_name_res.json()) > 0: 

74 # just take the first accepted record 

75 for record in worms_name_res.json(): 

76 if record['status'] == 'accepted': 

77 worms_tree_res_2 = requests.get(url=f'{WORMS_REST_URL}/AphiaClassificationByAphiaID/{record["AphiaID"]}') 

78 if worms_tree_res_2.status_code == 200: 

79 self.data[scientific_name] = flatten_taxa_tree(worms_tree_res_2.json(), {}) 

80 self.data[scientific_name]['aphia_id'] = record['AphiaID'] 

81 break 

82 else: 

83 print(f'{TERM_RED}No accepted record found for concept name "{scientific_name}"{TERM_NORMAL}') 

84 return False 

85 return True