Coverage for util/constants.py: 100%

36 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-12 17:57 +0000

1""" 

2This file holds basic data that is not changed each time format_and_output.py is run. 

3""" 

4 

5# DSCRTP accepted null vals 

6NULL_VAL_INT = -999 

7NULL_VAL_STRING = 'NA' 

8 

9# Column nums for after the json object is array-ized 

10# THESE MUST BE UPDATED WHENEVER COLUMNS SHIFT (see column nums in 'headers' below) 

11SAMPLE_ID = 0 

12TRACKING_ID = 1 

13SCIENTIFIC_NAME = 4 

14COMBINED_NAME_ID = 23 

15ID_COMMENTS = 25 

16OBSERVATION_DATE = 42 

17OBSERVATION_TIME = 43 

18INDV_COUNT = 57 

19VERBATIM_SIZE = 61 

20MIN_SIZE = 62 

21MAX_SIZE = 63 

22ASSOCIATED_TAXA = 66 

23OCCURRENCE_COMMENTS = 67 

24HABITAT = 71 

25SUBSTRATE = 72 

26CMECS_GEO_FORM = 73 

27IMAGE_PATH = 78 

28HIGHLIGHT_IMAGE = 79 

29BOUNDING_BOX_ID = 88 

30IDENTITY_REF = 89 

31UPON_IS_CREATURE = 90 

32VARS_CONCEPT_NAME = 91 

33 

34# column headers for output file 

35HEADERS = [ 

36 'SampleID', # 0 

37 'TrackingID', # 1 

38 'Citation', # 2 

39 'Repository', # 3 

40 'ScientificName', # 4 

41 'VernacularNameCategory', # 5 

42 'VernacularName', # 6 

43 'TaxonRank', # 7 

44 'AphiaID', # 8 

45 'LifeScienceIdentifier', # 9 

46 'Phylum', # 10 

47 'Class', # 11 

48 'Subclass', # 12 

49 'Order', # 13 

50 'Suborder', # 14 

51 'Family', # 15 

52 'Subfamily', # 16 

53 'Genus', # 17 

54 'Subgenus', # 18 

55 'Species', # 19 

56 'Subspecies', # 20 

57 'ScientificNameAuthorship', # 21 

58 'Morphospecies', # 22 

59 'CombinedNameID', # 23 

60 'Synonyms', # 24 

61 'IdentificationComments', # 25 

62 'IdentifiedBy', # 26 

63 'IdentificationDate', # 27 

64 'IdentificationQualifier', # 28 

65 'IdentificationVerificationStatus', # 29 

66 'Ocean', # 30 

67 'LargeMarineEcosystem', # 31 

68 'Country', # 32 

69 'FishCouncilRegion', # 33 

70 'Locality', # 34 

71 'Latitude', # 35 

72 'Longitude', # 36 

73 'DepthInMeters', # 37 

74 'DepthMethod', # 38 

75 'MinimumDepthInMeters', # 39 

76 'MaximumDepthInMeters', # 40 

77 'LocationComments', # 41 

78 'ObservationDate', # 42 

79 'ObservationTime', # 43 

80 'SurveyID', # 44 

81 'Vessel', # 45 

82 'PI', # 46 

83 'PIAffiliation', # 47 

84 'Purpose', # 48 

85 'SurveyComments', # 49 

86 'Station', # 50 

87 'EventID', # 51 

88 'SamplingEquipment', # 52 

89 'VehicleName', # 53 

90 'SampleAreaInSquareMeters', # 54 hardcoded for now, keeping column in case of future update (at request of DARC) 

91 'footprintWKT', # 55 same ^ 

92 'footprintSRS', # 56 same ^ 

93 'IndividualCount', # 57 

94 'CategoricalAbundance', # 58 

95 'Density', # 59 

96 'Cover', # 60 

97 'VerbatimSize', # 61 

98 'MinimumSize', # 62 

99 'MaximumSize', # 63 

100 'WeightInKg', # 64 

101 'Condition', # 65 

102 'AssociatedTaxa', # 66 

103 'OccurrenceComments', # 67 

104 'LocationAccuracy', # 68 

105 'NavType', # 69 

106 'OtherData', # 70 

107 'Habitat', # 71 

108 'Substrate', # 72 

109 'CMECSGeoForm', # 73 

110 'Temperature', # 74 

111 'Salinity', # 75 

112 'Oxygen', # 76 

113 'RecordType', # 77 

114 'ImageFilePath', # 78 

115 'HighlightImageFilePath', # 79 

116 'DataProvider', # 80 

117 'DataContact', # 81 

118 'Modified', # 82 

119 'WebSite', # 83 

120 'EntryDate', #84 

121 'Reporter', # 85 

122 'ReporterEmail', # 86 

123 'ReporterComments', # 87 

124 'BoundingBoxID', # 88 

125 'IdentityReference', # 89 - All columns here and below are for reference and are not output to the final .tsv 

126 'UponIsCreature', # 90 

127 'VARSConceptName' # 91 

128] 

129 

130# Substrate code translations and grain size order 

131SUB_CONCEPTS = { 

132 'sed': 'sediment', 

133 'peb': 'pebble', 

134 'cob': 'cobble', 

135 'bou': 'boulder', 

136 'bed': 'bedrock', 

137 'chi': 'chimney', 

138 'man': 'man-made', 

139 'dead': 'dead', 

140 'dik': 'dike rock formation of', 

141 'c': 'cemented', 

142 'b': 'basalt', 

143 'l': 'limestone', 

144 'fl': 'fluted', 

145 'blk': 'block', 

146 'nodmn': 'manganese nodules', 

147 'orgcn': 'Cnidaria', 

148 'orgal': 'algal organism', 

149 'orgrho': 'Rhodophyta', 

150 'rov': 'remotely operated underwater vehicle', 

151 'ven': 'vent', 

152 'mn': 'with manganese crust', 

153 'pi': ['pillow lava formation of', 'from pillow lava'], 

154 'a': 'composed of algal carbonate', 

155 't': 'talus', 

156 'po': 'pocket', 

157 'hp': 'of hydrothermal precipitate', 

158 'led': 'ledge', 

159 'cre': 'crevice', 

160 'cha': 'channel', 

161 'cav': 'cavity', 

162 'cra': 'crack', 

163 'bu': 'burrow', 

164 'mo': 'mound', 

165 'ho': 'hollow', 

166 'tr': 'track', 

167 'sc': 'sediment-covered', 

168 'tu': 'tube formation of', 

169 'mu': 'mudstone', 

170 'm': 'mudstone', 

171 'du': 'dunes', 

172 'ri': 'rippled', 

173 'col': 'columnar', 

174 'cn': 'Cnidaria', 

175 'spo': 'Porifera', 

176 'org': 'organism', 

177 ' org': 'organism', 

178 'orgsp': 'Porifera', 

179 'bi': 'organism', 

180 'art': 'artificial reef', 

181 'cem': 'cement', 

182 'fib': 'fiber object', 

183 'met': 'metallic object', 

184 'tra': 'trash', 

185 'ord': 'ordnance', 

186 'made': 'object', 

187 'wre': 'wreck', 

188 'pla': 'plastic object', 

189 'tube': 'Animal-made tube', 

190 'mid': 'midwater' 

191} 

192 

193# VARS shorthand for substrates 

194ROOTS = ['sed', 'nodmn', 'peb', 'cob', 'bou', 'blk', 'bed', 'chi', 'orgcn', 'orgal', 

195 'orgrho', 'orgsp', 'dead', 'man', 'rov', 'ven', 'org', 'bi', 'tube', 'mid'] 

196 

197# VARS substrates that don't have a shorthand 

198SAMES = ['organism', 'man-made trash', 'Animal-made tube', 'debris', 

199 'sediment', 'pebble', 'cobble', 'boulder', 'bedrock', 'midwater', 'surface', 'chimney'] 

200 

201""" 

202Suffixes and prefixes are additional descriptors added to a root. For example, 'scbedmn' consists of a prefix ('sc'),  

203a root ('bed'), and a suffix ('mn'). These will be added together and the final translation will be  

204'sediment-covered bedrock with manganese crust'. 

205""" 

206SUFFIXES = ['mn', 'pi', 'a', 'b', 't', 'po', 'hp'] 

207SUFFIXES_FORMS = ['led', 'cre', 'cha', 'cav', 'cra', 'bu', 'mo', 'ho', 'tr', 'du'] 

208PREFIXES = ['dik', 'fl', 'sc', 'tu', 'mu', 'ri', 'col', 'c', 'l', 'm'] 

209SUFFIXES_DEAD = ['cn', 'spo', ' org', 'bi'] 

210SUFFIXES_MAN = ['art', 'cem', 'fib', 'met', 'tra', 'ord', 'made', 'wre', 'pla'] 

211 

212ALL_AFFIXES = SUFFIXES + SUFFIXES_FORMS + PREFIXES + SUFFIXES_DEAD + SUFFIXES_MAN 

213ALL_AFFIXES.sort(key=len, reverse=True)