Coverage for application / tator / tator_base_qaqc_processor.py: 14%

136 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-23 05:22 +0000

1from abc import abstractmethod, ABC 

2 

3import sys 

4import tator 

5 

6from pptx import Presentation 

7 

8from application.tator.tator_localization_processor import TatorLocalizationProcessor 

9 

10 

11class TatorBaseQaqcProcessor(TatorLocalizationProcessor, ABC): 

12 """ 

13 Fetches annotation information from the Tator given a project id, section id, and list of deployments. 

14 Filters and formats the annotations for the various QA/QC checks. 

15 """ 

16 def __init__( 

17 self, 

18 project_id: int, 

19 section_ids: list[str], 

20 api: tator.api, 

21 tator_url: str, 

22 darc_review_url: str = None, 

23 transect_media_ids: list[int] = None, 

24 ): 

25 super().__init__( 

26 project_id=project_id, 

27 section_ids=section_ids, 

28 api=api, 

29 darc_review_url=darc_review_url, 

30 tator_url=tator_url, 

31 transect_media_ids=transect_media_ids, 

32 ) 

33 

34 def check_names_accepted(self): 

35 """ 

36 Finds records with a scientific name or tentative ID that is not accepted in WoRMS 

37 """ 

38 print('Checking for accepted names...') 

39 sys.stdout.flush() 

40 checked = {} 

41 for section in self.sections: 

42 records_of_interest = [] 

43 for localization in section.localizations: 

44 flag_record = False 

45 scientific_name = localization['attributes'].get('Scientific Name') 

46 tentative_id = localization['attributes'].get('Tentative ID') 

47 if scientific_name not in checked.keys(): 

48 if scientific_name in self.phylogeny.data: 

49 checked[scientific_name] = True 

50 else: 

51 if self.phylogeny.fetch_worms(scientific_name): 

52 checked[scientific_name] = True 

53 else: 

54 localization['problems'] = 'Scientific Name' 

55 checked[scientific_name] = False 

56 flag_record = True 

57 elif not checked[scientific_name]: 

58 localization['problems'] = 'Scientific Name' 

59 flag_record = True 

60 if tentative_id: 

61 if tentative_id not in checked.keys(): 

62 if tentative_id in self.phylogeny.data: 

63 checked[tentative_id] = True 

64 else: 

65 if self.phylogeny.fetch_worms(tentative_id): 

66 checked[tentative_id] = True 

67 else: 

68 localization['problems'] = 'Tentative ID' 

69 checked[tentative_id] = False 

70 flag_record = True 

71 elif not checked[tentative_id]: 

72 localization['problems'] = 'Tentative ID' if 'problems' not in localization.keys() else 'Scientific Name, Tentative ID' 

73 flag_record = True 

74 if flag_record: 

75 records_of_interest.append(localization) 

76 print(f'Found {len(records_of_interest)} localizations with unaccepted names from {section.deployment_name}!') 

77 section.localizations = records_of_interest 

78 self.process_records(no_match_records={key for key in checked.keys() if not checked[key]}) # don't try to fetch again for names we already know are unaccepted 

79 

80 def check_missing_qualifier(self): 

81 """ 

82 Finds records that are classified higher than species but don't have a qualifier set (usually '--'). This check 

83 need to call process_records first to populate phylogeny. 

84 """ 

85 self.process_records() 

86 actual_final_records = [] 

87 for record in self.final_records: 

88 if not record.get('species') and record.get('qualifier', '--') == '--': 

89 record['problems'] = 'Scientific Name, Qualifier' 

90 actual_final_records.append(record) 

91 self.final_records = actual_final_records 

92 

93 def check_stet_reason(self): 

94 """ 

95 Finds records that have a qualifier of 'stet' but no reason set. 

96 """ 

97 for section in self.sections: 

98 records_of_interest = [] 

99 for localization in section.localizations: 

100 if localization['attributes'].get('Qualifier') == 'stet.' \ 

101 and localization['attributes'].get('Reason', '--') == '--': 

102 localization['problems'] = 'Qualifier, Reason' 

103 records_of_interest.append(localization) 

104 section.localizations = records_of_interest 

105 self.process_records() 

106 

107 def get_all_tentative_ids_and_morphospecies(self): 

108 """ 

109 Finds every record with a tentative ID or morphospecies. Also checks whether or not the tentative ID is in the same 

110 phylogenetic group as the scientific name. 

111 """ 

112 no_match_records = set() 

113 for section in self.sections: 

114 records_of_interest = [] 

115 for localization in section.localizations: 

116 tentative_id = localization['attributes'].get('Tentative ID') 

117 morphospecies = localization['attributes'].get('Morphospecies') 

118 is_record_of_interest = False 

119 localization_problems = '' 

120 if tentative_id and tentative_id not in ['--', '-', '']: 

121 is_record_of_interest = True 

122 localization_problems += 'Tentative ID' 

123 if morphospecies and morphospecies not in ['--', '-', '']: 

124 is_record_of_interest = True 

125 localization_problems += ' Morphospecies' 

126 if is_record_of_interest: 

127 records_of_interest.append(localization) 

128 localization['problems'] = localization_problems 

129 section.localizations = records_of_interest 

130 self.process_records() # process first to make sure phylogeny is populated 

131 for localization in self.final_records: 

132 phylogeny_match = False 

133 if localization['tentative_id'] not in self.phylogeny.data: 

134 if localization['tentative_id'] not in no_match_records: 

135 if not self.phylogeny.fetch_worms(localization['tentative_id']): 

136 no_match_records.add(localization['tentative_id']) 

137 localization['problems'] += ' phylogeny no match' 

138 continue 

139 else: 

140 localization['problems'] += ' phylogeny no match' 

141 continue 

142 for value in self.phylogeny.data[localization['tentative_id']].values(): 

143 if value == localization['scientific_name']: 

144 phylogeny_match = True 

145 break 

146 if not phylogeny_match: 

147 localization['problems'] += ' phylogeny no match' 

148 self.phylogeny.save() 

149 

150 def get_all_notes_and_remarks(self): 

151 """ 

152 Finds every record with a note or remark. 

153 """ 

154 for section in self.sections: 

155 records_of_interest = [] 

156 for localization in section.localizations: 

157 notes = localization['attributes'].get('Notes') 

158 id_remarks = localization['attributes'].get('IdentificationRemarks') 

159 has_note = notes and notes not in ['--', '-', ''] 

160 has_remark = id_remarks and id_remarks not in ['--', '-', ''] 

161 if has_note and has_remark: 

162 localization['problems'] = 'Notes, ID Remarks' 

163 records_of_interest.append(localization) 

164 elif has_note: 

165 localization['problems'] = 'Notes' 

166 records_of_interest.append(localization) 

167 elif has_remark: 

168 localization['problems'] = 'ID Remarks' 

169 records_of_interest.append(localization) 

170 section.localizations = records_of_interest 

171 self.process_records() 

172 

173 def get_re_examined(self): 

174 """ 

175 Finds all records that have a reason of "to be re-examined" 

176 """ 

177 for section in self.sections: 

178 records_of_interest = [] 

179 for localization in section.localizations: 

180 if localization['attributes'].get('Reason') == 'To be re-examined': 

181 records_of_interest.append(localization) 

182 section.localizations = records_of_interest 

183 self.process_records() 

184 

185 @abstractmethod 

186 def get_unique_taxa(self): 

187 """ 

188 Finds every unique scientific name, tentative ID, and morphospecies combo and box/dot info. 

189 """ 

190 pass 

191 

192 @abstractmethod 

193 def get_summary(self): 

194 """ 

195 Returns a summary of the final records. 

196 """ 

197 pass 

198 

199 @abstractmethod 

200 def download_image_guide(self, app) -> Presentation: 

201 """ 

202 Finds all records marked as "good" images, saves them to a ppt. 

203 """ 

204 pass