Coverage for application / tator / tator_base_qaqc_processor.py: 14%
136 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-23 05:22 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-23 05:22 +0000
1from abc import abstractmethod, ABC
3import sys
4import tator
6from pptx import Presentation
8from application.tator.tator_localization_processor import TatorLocalizationProcessor
11class TatorBaseQaqcProcessor(TatorLocalizationProcessor, ABC):
12 """
13 Fetches annotation information from the Tator given a project id, section id, and list of deployments.
14 Filters and formats the annotations for the various QA/QC checks.
15 """
16 def __init__(
17 self,
18 project_id: int,
19 section_ids: list[str],
20 api: tator.api,
21 tator_url: str,
22 darc_review_url: str = None,
23 transect_media_ids: list[int] = None,
24 ):
25 super().__init__(
26 project_id=project_id,
27 section_ids=section_ids,
28 api=api,
29 darc_review_url=darc_review_url,
30 tator_url=tator_url,
31 transect_media_ids=transect_media_ids,
32 )
34 def check_names_accepted(self):
35 """
36 Finds records with a scientific name or tentative ID that is not accepted in WoRMS
37 """
38 print('Checking for accepted names...')
39 sys.stdout.flush()
40 checked = {}
41 for section in self.sections:
42 records_of_interest = []
43 for localization in section.localizations:
44 flag_record = False
45 scientific_name = localization['attributes'].get('Scientific Name')
46 tentative_id = localization['attributes'].get('Tentative ID')
47 if scientific_name not in checked.keys():
48 if scientific_name in self.phylogeny.data:
49 checked[scientific_name] = True
50 else:
51 if self.phylogeny.fetch_worms(scientific_name):
52 checked[scientific_name] = True
53 else:
54 localization['problems'] = 'Scientific Name'
55 checked[scientific_name] = False
56 flag_record = True
57 elif not checked[scientific_name]:
58 localization['problems'] = 'Scientific Name'
59 flag_record = True
60 if tentative_id:
61 if tentative_id not in checked.keys():
62 if tentative_id in self.phylogeny.data:
63 checked[tentative_id] = True
64 else:
65 if self.phylogeny.fetch_worms(tentative_id):
66 checked[tentative_id] = True
67 else:
68 localization['problems'] = 'Tentative ID'
69 checked[tentative_id] = False
70 flag_record = True
71 elif not checked[tentative_id]:
72 localization['problems'] = 'Tentative ID' if 'problems' not in localization.keys() else 'Scientific Name, Tentative ID'
73 flag_record = True
74 if flag_record:
75 records_of_interest.append(localization)
76 print(f'Found {len(records_of_interest)} localizations with unaccepted names from {section.deployment_name}!')
77 section.localizations = records_of_interest
78 self.process_records(no_match_records={key for key in checked.keys() if not checked[key]}) # don't try to fetch again for names we already know are unaccepted
80 def check_missing_qualifier(self):
81 """
82 Finds records that are classified higher than species but don't have a qualifier set (usually '--'). This check
83 need to call process_records first to populate phylogeny.
84 """
85 self.process_records()
86 actual_final_records = []
87 for record in self.final_records:
88 if not record.get('species') and record.get('qualifier', '--') == '--':
89 record['problems'] = 'Scientific Name, Qualifier'
90 actual_final_records.append(record)
91 self.final_records = actual_final_records
93 def check_stet_reason(self):
94 """
95 Finds records that have a qualifier of 'stet' but no reason set.
96 """
97 for section in self.sections:
98 records_of_interest = []
99 for localization in section.localizations:
100 if localization['attributes'].get('Qualifier') == 'stet.' \
101 and localization['attributes'].get('Reason', '--') == '--':
102 localization['problems'] = 'Qualifier, Reason'
103 records_of_interest.append(localization)
104 section.localizations = records_of_interest
105 self.process_records()
107 def get_all_tentative_ids_and_morphospecies(self):
108 """
109 Finds every record with a tentative ID or morphospecies. Also checks whether or not the tentative ID is in the same
110 phylogenetic group as the scientific name.
111 """
112 no_match_records = set()
113 for section in self.sections:
114 records_of_interest = []
115 for localization in section.localizations:
116 tentative_id = localization['attributes'].get('Tentative ID')
117 morphospecies = localization['attributes'].get('Morphospecies')
118 is_record_of_interest = False
119 localization_problems = ''
120 if tentative_id and tentative_id not in ['--', '-', '']:
121 is_record_of_interest = True
122 localization_problems += 'Tentative ID'
123 if morphospecies and morphospecies not in ['--', '-', '']:
124 is_record_of_interest = True
125 localization_problems += ' Morphospecies'
126 if is_record_of_interest:
127 records_of_interest.append(localization)
128 localization['problems'] = localization_problems
129 section.localizations = records_of_interest
130 self.process_records() # process first to make sure phylogeny is populated
131 for localization in self.final_records:
132 phylogeny_match = False
133 if localization['tentative_id'] not in self.phylogeny.data:
134 if localization['tentative_id'] not in no_match_records:
135 if not self.phylogeny.fetch_worms(localization['tentative_id']):
136 no_match_records.add(localization['tentative_id'])
137 localization['problems'] += ' phylogeny no match'
138 continue
139 else:
140 localization['problems'] += ' phylogeny no match'
141 continue
142 for value in self.phylogeny.data[localization['tentative_id']].values():
143 if value == localization['scientific_name']:
144 phylogeny_match = True
145 break
146 if not phylogeny_match:
147 localization['problems'] += ' phylogeny no match'
148 self.phylogeny.save()
150 def get_all_notes_and_remarks(self):
151 """
152 Finds every record with a note or remark.
153 """
154 for section in self.sections:
155 records_of_interest = []
156 for localization in section.localizations:
157 notes = localization['attributes'].get('Notes')
158 id_remarks = localization['attributes'].get('IdentificationRemarks')
159 has_note = notes and notes not in ['--', '-', '']
160 has_remark = id_remarks and id_remarks not in ['--', '-', '']
161 if has_note and has_remark:
162 localization['problems'] = 'Notes, ID Remarks'
163 records_of_interest.append(localization)
164 elif has_note:
165 localization['problems'] = 'Notes'
166 records_of_interest.append(localization)
167 elif has_remark:
168 localization['problems'] = 'ID Remarks'
169 records_of_interest.append(localization)
170 section.localizations = records_of_interest
171 self.process_records()
173 def get_re_examined(self):
174 """
175 Finds all records that have a reason of "to be re-examined"
176 """
177 for section in self.sections:
178 records_of_interest = []
179 for localization in section.localizations:
180 if localization['attributes'].get('Reason') == 'To be re-examined':
181 records_of_interest.append(localization)
182 section.localizations = records_of_interest
183 self.process_records()
185 @abstractmethod
186 def get_unique_taxa(self):
187 """
188 Finds every unique scientific name, tentative ID, and morphospecies combo and box/dot info.
189 """
190 pass
192 @abstractmethod
193 def get_summary(self):
194 """
195 Returns a summary of the final records.
196 """
197 pass
199 @abstractmethod
200 def download_image_guide(self, app) -> Presentation:
201 """
202 Finds all records marked as "good" images, saves them to a ppt.
203 """
204 pass