Coverage for application/qaqc/vars/routes.py: 10%
155 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-23 02:22 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-23 02:22 +0000
1"""
2VARS-specific QA/QC endpoints
4/qaqc/vars/checklist [GET, PATCH]
5/qaqc/vars/check/<check> [GET]
6/qaqc/vars/quick-check/<check> [GET]
7"""
9import threading
11import requests
12from flask import current_app, render_template, request, session
14from . import vars_qaqc_bp
15from .vars_qaqc_processor import VarsQaqcProcessor
16from application.util.constants import TERM_NORMAL, TERM_RED
19# qaqc checklist page for vars
20@vars_qaqc_bp.get('/checklist')
21def vars_qaqc_checklist():
22 sequences = request.args.getlist('sequence')
23 total_counts = {
24 'annotations': 0,
25 'individuals': 0,
26 'true_localizations': 0, # number of bounding box associations in dive
27 'group_localizations': 0, # number of annotations marked 'group: localization'
28 }
29 with requests.get(
30 url=f'{current_app.config.get("DARC_REVIEW_URL")}/vars-qaqc-checklist/{"&".join(request.args.getlist("sequence"))}',
31 headers=current_app.config.get('DARC_REVIEW_HEADERS'),
32 ) as checklist_res:
33 if checklist_res.status_code == 200:
34 checklist = checklist_res.json()
35 else:
36 print('ERROR: Unable to get QAQC checklist from external review server')
37 checklist = {}
38 # get counts
39 threads = []
40 for sequence in sequences:
41 thread = threading.Thread(
42 target=get_sequence_counts,
43 args=(sequence, total_counts, current_app.config.get('VARS_DIVE_QUERY_URL')),
44 )
45 threads.append(thread)
46 thread.start()
47 for thread in threads:
48 thread.join()
49 return render_template(
50 'qaqc/vars/qaqc-checklist.html',
51 annotation_count=total_counts['annotations'],
52 individual_count=total_counts['individuals'],
53 true_localization_count=total_counts['true_localizations'],
54 group_localization_count=total_counts['group_localizations'],
55 checklist=checklist,
56 tab_title=sequences[0] if len(sequences) == 1 else f'{sequences[0]} - {sequences[-1].split(" ")[-1]}'
57 )
60def get_sequence_counts(sequence_name, total_counts, vars_dive_url):
61 identity_references = set()
62 sequence_annotations = 0
63 sequence_individuals = 0
64 sequence_true_localizations = 0
65 sequence_group_localizations = 0
66 res = requests.get(f'{vars_dive_url}/{sequence_name.replace(" ", "%20")}')
67 if res.status_code != 200:
68 print(f'{TERM_RED}Failed to fetch annotations for sequence {sequence_name}{TERM_NORMAL}')
69 annotations = res.json()['annotations']
70 sequence_annotations += len(annotations)
71 for annotation in annotations:
72 if len(annotation['concept']) == 0 or annotation['concept'][0].islower():
73 # ignore non-taxonomic concepts
74 continue
75 if annotation.get('group') == 'localization':
76 sequence_true_localizations += 1
77 sequence_group_localizations += 1
78 continue
79 id_ref = None
80 cat_abundance = None
81 pop_quantity = None
82 for association in annotation['associations']:
83 if association['link_name'] == 'identity-reference':
84 id_ref = association['link_value']
85 elif association['link_name'] == 'categorical-abundance':
86 cat_abundance = association['link_value']
87 elif association['link_name'] == 'population-quantity':
88 pop_quantity = association['link_value']
89 elif association['link_name'] == 'bounding box':
90 sequence_true_localizations += 1
91 if id_ref:
92 if id_ref in identity_references:
93 continue
94 else:
95 identity_references.add(id_ref)
96 if cat_abundance:
97 match cat_abundance:
98 case '11-20':
99 sequence_individuals += 15
100 case '21-50':
101 sequence_individuals += 35
102 case '51-100':
103 sequence_individuals += 75
104 case '\u003e100':
105 sequence_individuals += 100
106 continue
107 if pop_quantity and pop_quantity != '':
108 sequence_individuals += int(pop_quantity)
109 continue
110 sequence_individuals += 1
111 total_counts['annotations'] += sequence_annotations
112 total_counts['individuals'] += sequence_individuals
113 total_counts['true_localizations'] += sequence_true_localizations
114 total_counts['group_localizations'] += sequence_group_localizations
117# update vars qaqc checklist
118@vars_qaqc_bp.patch('/checklist')
119def patch_vars_qaqc_checklist():
120 req_json = request.json
121 sequences = req_json.get('sequences')
122 if not sequences:
123 return {}, 400
124 req_json.pop('sequences')
125 res = requests.patch(
126 url=f'{current_app.config.get("DARC_REVIEW_URL")}/vars-qaqc-checklist/{sequences}',
127 headers=current_app.config.get('DARC_REVIEW_HEADERS'),
128 json=req_json,
129 )
130 return res.json(), res.status_code
133# individual qaqc checks (VARS)
134@vars_qaqc_bp.get('/check/<check>')
135def vars_qaqc(check):
136 sequences = request.args.getlist('sequence')
137 qaqc_annos = VarsQaqcProcessor(
138 sequence_names=sequences,
139 vars_dive_url=current_app.config.get('VARS_DIVE_QUERY_URL'),
140 vars_phylogeny_url=current_app.config.get('VARS_PHYLOGENY_URL'),
141 )
142 tab_title = sequences[0] if len(sequences) == 1 else f'{sequences[0]} - {sequences[-1].split(" ")[-1]}'
143 data = {
144 'concepts': session.get('vars_concepts', []),
145 'title': check.replace('-', ' ').title(),
146 'tab_title': f'{tab_title} {check.replace("-", " ").title()}',
147 }
148 match check:
149 case 'multiple-associations':
150 qaqc_annos.find_duplicate_associations()
151 data['page_title'] = 'Records with multiples of the same association other than s2'
152 case 'missing-primary-substrate':
153 qaqc_annos.find_missing_s1()
154 data['page_title'] = 'Records missing primary substrate'
155 case 'identical-s1-&-s2':
156 qaqc_annos.find_identical_s1_s2()
157 data['page_title'] = 'Records with identical primary and secondary substrates'
158 case 'duplicate-s2':
159 qaqc_annos.find_duplicate_s2()
160 data['page_title'] = 'Records with with duplicate secondary substrates'
161 case 'missing-upon-substrate':
162 qaqc_annos.find_missing_upon_substrate()
163 data['page_title'] = 'Records missing a substrate that it is recorded "upon"'
164 case 'mismatched-substrates':
165 qaqc_annos.find_mismatched_substrates()
166 data['page_title'] = 'Records occurring at the same timestamp with mismatched substrates'
167 case 'missing-upon':
168 qaqc_annos.find_missing_upon()
169 data['page_title'] = 'Records other than "none" missing "upon"'
170 case 'missing-ancillary-data':
171 qaqc_annos.find_missing_ancillary_data()
172 data['page_title'] = 'Records missing ancillary data'
173 case 'id-ref-concept-name':
174 qaqc_annos.find_id_refs_different_concept_name()
175 data['page_title'] = 'Records with the same ID reference that have different concept names'
176 case 'id-ref-associations':
177 qaqc_annos.find_id_refs_conflicting_associations()
178 data['page_title'] = 'Records with the same ID reference that have conflicting associations'
179 case 'blank-associations':
180 qaqc_annos.find_blank_associations()
181 data['page_title'] = 'Records with blank association link values'
182 case 'suspicious-hosts':
183 qaqc_annos.find_suspicious_hosts()
184 data['page_title'] = 'Records with suspicious hosts'
185 case 'expected-associations':
186 qaqc_annos.find_missing_expected_association()
187 data['page_title'] = 'Records expected to be associated with an organism but "upon" is inanimate'
188 case 'host-associate-time-diff':
189 qaqc_annos.find_long_host_associate_time_diff()
190 data['page_title'] = 'Records where "upon" occurred more than one minute ago or cannot be found'
191 case 'localizations-missing-bounding-box':
192 qaqc_annos.find_localizations_without_bounding_boxes()
193 data['page_title'] = 'Records in the "localization" group that do not contain a "bounding box" association ' \
194 '(also displays records not in the "localization" group that contain a "bounding box" ' \
195 'association)'
196 case 'number-of-bounding-boxes':
197 qaqc_annos.find_num_bounding_boxes()
198 data['page_title'] = 'Number of bounding boxes for each unique concept'
199 case 'unique-fields':
200 qaqc_annos.find_unique_fields()
201 data['unique_list'] = qaqc_annos.final_records
202 return render_template('qaqc/vars/qaqc-unique.html', data=data)
203 data['annotations'] = qaqc_annos.final_records
204 return render_template('qaqc/vars/qaqc.html', data=data)
207@vars_qaqc_bp.get('/quick-check/<check>')
208def qaqc_quick(check):
209 sequences = request.args.getlist('sequence')
210 qaqc_annos = VarsQaqcProcessor(
211 sequence_names=sequences,
212 vars_dive_url=current_app.config.get('VARS_DIVE_QUERY_URL'),
213 vars_phylogeny_url=current_app.config.get('VARS_PHYLOGENY_URL'),
214 )
215 match check:
216 case 'missing-ancillary-data':
217 records = qaqc_annos.get_num_records_missing_ancillary_data()
218 return {'num_records': records}, 200
219 return render_template('errors/404.html', err=''), 404