Coverage for application/qaqc/vars/routes.py: 10%

155 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-23 02:22 +0000

1""" 

2VARS-specific QA/QC endpoints 

3 

4/qaqc/vars/checklist [GET, PATCH] 

5/qaqc/vars/check/<check> [GET] 

6/qaqc/vars/quick-check/<check> [GET] 

7""" 

8 

9import threading 

10 

11import requests 

12from flask import current_app, render_template, request, session 

13 

14from . import vars_qaqc_bp 

15from .vars_qaqc_processor import VarsQaqcProcessor 

16from application.util.constants import TERM_NORMAL, TERM_RED 

17 

18 

19# qaqc checklist page for vars 

20@vars_qaqc_bp.get('/checklist') 

21def vars_qaqc_checklist(): 

22 sequences = request.args.getlist('sequence') 

23 total_counts = { 

24 'annotations': 0, 

25 'individuals': 0, 

26 'true_localizations': 0, # number of bounding box associations in dive 

27 'group_localizations': 0, # number of annotations marked 'group: localization' 

28 } 

29 with requests.get( 

30 url=f'{current_app.config.get("DARC_REVIEW_URL")}/vars-qaqc-checklist/{"&".join(request.args.getlist("sequence"))}', 

31 headers=current_app.config.get('DARC_REVIEW_HEADERS'), 

32 ) as checklist_res: 

33 if checklist_res.status_code == 200: 

34 checklist = checklist_res.json() 

35 else: 

36 print('ERROR: Unable to get QAQC checklist from external review server') 

37 checklist = {} 

38 # get counts 

39 threads = [] 

40 for sequence in sequences: 

41 thread = threading.Thread( 

42 target=get_sequence_counts, 

43 args=(sequence, total_counts, current_app.config.get('VARS_DIVE_QUERY_URL')), 

44 ) 

45 threads.append(thread) 

46 thread.start() 

47 for thread in threads: 

48 thread.join() 

49 return render_template( 

50 'qaqc/vars/qaqc-checklist.html', 

51 annotation_count=total_counts['annotations'], 

52 individual_count=total_counts['individuals'], 

53 true_localization_count=total_counts['true_localizations'], 

54 group_localization_count=total_counts['group_localizations'], 

55 checklist=checklist, 

56 tab_title=sequences[0] if len(sequences) == 1 else f'{sequences[0]} - {sequences[-1].split(" ")[-1]}' 

57 ) 

58 

59 

60def get_sequence_counts(sequence_name, total_counts, vars_dive_url): 

61 identity_references = set() 

62 sequence_annotations = 0 

63 sequence_individuals = 0 

64 sequence_true_localizations = 0 

65 sequence_group_localizations = 0 

66 res = requests.get(f'{vars_dive_url}/{sequence_name.replace(" ", "%20")}') 

67 if res.status_code != 200: 

68 print(f'{TERM_RED}Failed to fetch annotations for sequence {sequence_name}{TERM_NORMAL}') 

69 annotations = res.json()['annotations'] 

70 sequence_annotations += len(annotations) 

71 for annotation in annotations: 

72 if len(annotation['concept']) == 0 or annotation['concept'][0].islower(): 

73 # ignore non-taxonomic concepts 

74 continue 

75 if annotation.get('group') == 'localization': 

76 sequence_true_localizations += 1 

77 sequence_group_localizations += 1 

78 continue 

79 id_ref = None 

80 cat_abundance = None 

81 pop_quantity = None 

82 for association in annotation['associations']: 

83 if association['link_name'] == 'identity-reference': 

84 id_ref = association['link_value'] 

85 elif association['link_name'] == 'categorical-abundance': 

86 cat_abundance = association['link_value'] 

87 elif association['link_name'] == 'population-quantity': 

88 pop_quantity = association['link_value'] 

89 elif association['link_name'] == 'bounding box': 

90 sequence_true_localizations += 1 

91 if id_ref: 

92 if id_ref in identity_references: 

93 continue 

94 else: 

95 identity_references.add(id_ref) 

96 if cat_abundance: 

97 match cat_abundance: 

98 case '11-20': 

99 sequence_individuals += 15 

100 case '21-50': 

101 sequence_individuals += 35 

102 case '51-100': 

103 sequence_individuals += 75 

104 case '\u003e100': 

105 sequence_individuals += 100 

106 continue 

107 if pop_quantity and pop_quantity != '': 

108 sequence_individuals += int(pop_quantity) 

109 continue 

110 sequence_individuals += 1 

111 total_counts['annotations'] += sequence_annotations 

112 total_counts['individuals'] += sequence_individuals 

113 total_counts['true_localizations'] += sequence_true_localizations 

114 total_counts['group_localizations'] += sequence_group_localizations 

115 

116 

117# update vars qaqc checklist 

118@vars_qaqc_bp.patch('/checklist') 

119def patch_vars_qaqc_checklist(): 

120 req_json = request.json 

121 sequences = req_json.get('sequences') 

122 if not sequences: 

123 return {}, 400 

124 req_json.pop('sequences') 

125 res = requests.patch( 

126 url=f'{current_app.config.get("DARC_REVIEW_URL")}/vars-qaqc-checklist/{sequences}', 

127 headers=current_app.config.get('DARC_REVIEW_HEADERS'), 

128 json=req_json, 

129 ) 

130 return res.json(), res.status_code 

131 

132 

133# individual qaqc checks (VARS) 

134@vars_qaqc_bp.get('/check/<check>') 

135def vars_qaqc(check): 

136 sequences = request.args.getlist('sequence') 

137 qaqc_annos = VarsQaqcProcessor( 

138 sequence_names=sequences, 

139 vars_dive_url=current_app.config.get('VARS_DIVE_QUERY_URL'), 

140 vars_phylogeny_url=current_app.config.get('VARS_PHYLOGENY_URL'), 

141 ) 

142 tab_title = sequences[0] if len(sequences) == 1 else f'{sequences[0]} - {sequences[-1].split(" ")[-1]}' 

143 data = { 

144 'concepts': session.get('vars_concepts', []), 

145 'title': check.replace('-', ' ').title(), 

146 'tab_title': f'{tab_title} {check.replace("-", " ").title()}', 

147 } 

148 match check: 

149 case 'multiple-associations': 

150 qaqc_annos.find_duplicate_associations() 

151 data['page_title'] = 'Records with multiples of the same association other than s2' 

152 case 'missing-primary-substrate': 

153 qaqc_annos.find_missing_s1() 

154 data['page_title'] = 'Records missing primary substrate' 

155 case 'identical-s1-&-s2': 

156 qaqc_annos.find_identical_s1_s2() 

157 data['page_title'] = 'Records with identical primary and secondary substrates' 

158 case 'duplicate-s2': 

159 qaqc_annos.find_duplicate_s2() 

160 data['page_title'] = 'Records with with duplicate secondary substrates' 

161 case 'missing-upon-substrate': 

162 qaqc_annos.find_missing_upon_substrate() 

163 data['page_title'] = 'Records missing a substrate that it is recorded "upon"' 

164 case 'mismatched-substrates': 

165 qaqc_annos.find_mismatched_substrates() 

166 data['page_title'] = 'Records occurring at the same timestamp with mismatched substrates' 

167 case 'missing-upon': 

168 qaqc_annos.find_missing_upon() 

169 data['page_title'] = 'Records other than "none" missing "upon"' 

170 case 'missing-ancillary-data': 

171 qaqc_annos.find_missing_ancillary_data() 

172 data['page_title'] = 'Records missing ancillary data' 

173 case 'id-ref-concept-name': 

174 qaqc_annos.find_id_refs_different_concept_name() 

175 data['page_title'] = 'Records with the same ID reference that have different concept names' 

176 case 'id-ref-associations': 

177 qaqc_annos.find_id_refs_conflicting_associations() 

178 data['page_title'] = 'Records with the same ID reference that have conflicting associations' 

179 case 'blank-associations': 

180 qaqc_annos.find_blank_associations() 

181 data['page_title'] = 'Records with blank association link values' 

182 case 'suspicious-hosts': 

183 qaqc_annos.find_suspicious_hosts() 

184 data['page_title'] = 'Records with suspicious hosts' 

185 case 'expected-associations': 

186 qaqc_annos.find_missing_expected_association() 

187 data['page_title'] = 'Records expected to be associated with an organism but "upon" is inanimate' 

188 case 'host-associate-time-diff': 

189 qaqc_annos.find_long_host_associate_time_diff() 

190 data['page_title'] = 'Records where "upon" occurred more than one minute ago or cannot be found' 

191 case 'localizations-missing-bounding-box': 

192 qaqc_annos.find_localizations_without_bounding_boxes() 

193 data['page_title'] = 'Records in the "localization" group that do not contain a "bounding box" association ' \ 

194 '(also displays records not in the "localization" group that contain a "bounding box" ' \ 

195 'association)' 

196 case 'number-of-bounding-boxes': 

197 qaqc_annos.find_num_bounding_boxes() 

198 data['page_title'] = 'Number of bounding boxes for each unique concept' 

199 case 'unique-fields': 

200 qaqc_annos.find_unique_fields() 

201 data['unique_list'] = qaqc_annos.final_records 

202 return render_template('qaqc/vars/qaqc-unique.html', data=data) 

203 data['annotations'] = qaqc_annos.final_records 

204 return render_template('qaqc/vars/qaqc.html', data=data) 

205 

206 

207@vars_qaqc_bp.get('/quick-check/<check>') 

208def qaqc_quick(check): 

209 sequences = request.args.getlist('sequence') 

210 qaqc_annos = VarsQaqcProcessor( 

211 sequence_names=sequences, 

212 vars_dive_url=current_app.config.get('VARS_DIVE_QUERY_URL'), 

213 vars_phylogeny_url=current_app.config.get('VARS_PHYLOGENY_URL'), 

214 ) 

215 match check: 

216 case 'missing-ancillary-data': 

217 records = qaqc_annos.get_num_records_missing_ancillary_data() 

218 return {'num_records': records}, 200 

219 return render_template('errors/404.html', err=''), 404