Coverage for util/constants.py: 100%
36 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-12 17:57 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-12 17:57 +0000
1"""
2This file holds basic data that is not changed each time format_and_output.py is run.
3"""
5# DSCRTP accepted null vals
6NULL_VAL_INT = -999
7NULL_VAL_STRING = 'NA'
9# Column nums for after the json object is array-ized
10# THESE MUST BE UPDATED WHENEVER COLUMNS SHIFT (see column nums in 'headers' below)
11SAMPLE_ID = 0
12TRACKING_ID = 1
13SCIENTIFIC_NAME = 4
14COMBINED_NAME_ID = 23
15ID_COMMENTS = 25
16OBSERVATION_DATE = 42
17OBSERVATION_TIME = 43
18INDV_COUNT = 57
19VERBATIM_SIZE = 61
20MIN_SIZE = 62
21MAX_SIZE = 63
22ASSOCIATED_TAXA = 66
23OCCURRENCE_COMMENTS = 67
24HABITAT = 71
25SUBSTRATE = 72
26CMECS_GEO_FORM = 73
27IMAGE_PATH = 78
28HIGHLIGHT_IMAGE = 79
29BOUNDING_BOX_ID = 88
30IDENTITY_REF = 89
31UPON_IS_CREATURE = 90
32VARS_CONCEPT_NAME = 91
34# column headers for output file
35HEADERS = [
36 'SampleID', # 0
37 'TrackingID', # 1
38 'Citation', # 2
39 'Repository', # 3
40 'ScientificName', # 4
41 'VernacularNameCategory', # 5
42 'VernacularName', # 6
43 'TaxonRank', # 7
44 'AphiaID', # 8
45 'LifeScienceIdentifier', # 9
46 'Phylum', # 10
47 'Class', # 11
48 'Subclass', # 12
49 'Order', # 13
50 'Suborder', # 14
51 'Family', # 15
52 'Subfamily', # 16
53 'Genus', # 17
54 'Subgenus', # 18
55 'Species', # 19
56 'Subspecies', # 20
57 'ScientificNameAuthorship', # 21
58 'Morphospecies', # 22
59 'CombinedNameID', # 23
60 'Synonyms', # 24
61 'IdentificationComments', # 25
62 'IdentifiedBy', # 26
63 'IdentificationDate', # 27
64 'IdentificationQualifier', # 28
65 'IdentificationVerificationStatus', # 29
66 'Ocean', # 30
67 'LargeMarineEcosystem', # 31
68 'Country', # 32
69 'FishCouncilRegion', # 33
70 'Locality', # 34
71 'Latitude', # 35
72 'Longitude', # 36
73 'DepthInMeters', # 37
74 'DepthMethod', # 38
75 'MinimumDepthInMeters', # 39
76 'MaximumDepthInMeters', # 40
77 'LocationComments', # 41
78 'ObservationDate', # 42
79 'ObservationTime', # 43
80 'SurveyID', # 44
81 'Vessel', # 45
82 'PI', # 46
83 'PIAffiliation', # 47
84 'Purpose', # 48
85 'SurveyComments', # 49
86 'Station', # 50
87 'EventID', # 51
88 'SamplingEquipment', # 52
89 'VehicleName', # 53
90 'SampleAreaInSquareMeters', # 54 hardcoded for now, keeping column in case of future update (at request of DARC)
91 'footprintWKT', # 55 same ^
92 'footprintSRS', # 56 same ^
93 'IndividualCount', # 57
94 'CategoricalAbundance', # 58
95 'Density', # 59
96 'Cover', # 60
97 'VerbatimSize', # 61
98 'MinimumSize', # 62
99 'MaximumSize', # 63
100 'WeightInKg', # 64
101 'Condition', # 65
102 'AssociatedTaxa', # 66
103 'OccurrenceComments', # 67
104 'LocationAccuracy', # 68
105 'NavType', # 69
106 'OtherData', # 70
107 'Habitat', # 71
108 'Substrate', # 72
109 'CMECSGeoForm', # 73
110 'Temperature', # 74
111 'Salinity', # 75
112 'Oxygen', # 76
113 'RecordType', # 77
114 'ImageFilePath', # 78
115 'HighlightImageFilePath', # 79
116 'DataProvider', # 80
117 'DataContact', # 81
118 'Modified', # 82
119 'WebSite', # 83
120 'EntryDate', #84
121 'Reporter', # 85
122 'ReporterEmail', # 86
123 'ReporterComments', # 87
124 'BoundingBoxID', # 88
125 'IdentityReference', # 89 - All columns here and below are for reference and are not output to the final .tsv
126 'UponIsCreature', # 90
127 'VARSConceptName' # 91
128]
130# Substrate code translations and grain size order
131SUB_CONCEPTS = {
132 'sed': 'sediment',
133 'peb': 'pebble',
134 'cob': 'cobble',
135 'bou': 'boulder',
136 'bed': 'bedrock',
137 'chi': 'chimney',
138 'man': 'man-made',
139 'dead': 'dead',
140 'dik': 'dike rock formation of',
141 'c': 'cemented',
142 'b': 'basalt',
143 'l': 'limestone',
144 'fl': 'fluted',
145 'blk': 'block',
146 'nodmn': 'manganese nodules',
147 'orgcn': 'Cnidaria',
148 'orgal': 'algal organism',
149 'orgrho': 'Rhodophyta',
150 'rov': 'remotely operated underwater vehicle',
151 'ven': 'vent',
152 'mn': 'with manganese crust',
153 'pi': ['pillow lava formation of', 'from pillow lava'],
154 'a': 'composed of algal carbonate',
155 't': 'talus',
156 'po': 'pocket',
157 'hp': 'of hydrothermal precipitate',
158 'led': 'ledge',
159 'cre': 'crevice',
160 'cha': 'channel',
161 'cav': 'cavity',
162 'cra': 'crack',
163 'bu': 'burrow',
164 'mo': 'mound',
165 'ho': 'hollow',
166 'tr': 'track',
167 'sc': 'sediment-covered',
168 'tu': 'tube formation of',
169 'mu': 'mudstone',
170 'm': 'mudstone',
171 'du': 'dunes',
172 'ri': 'rippled',
173 'col': 'columnar',
174 'cn': 'Cnidaria',
175 'spo': 'Porifera',
176 'org': 'organism',
177 ' org': 'organism',
178 'orgsp': 'Porifera',
179 'bi': 'organism',
180 'art': 'artificial reef',
181 'cem': 'cement',
182 'fib': 'fiber object',
183 'met': 'metallic object',
184 'tra': 'trash',
185 'ord': 'ordnance',
186 'made': 'object',
187 'wre': 'wreck',
188 'pla': 'plastic object',
189 'tube': 'Animal-made tube',
190 'mid': 'midwater'
191}
193# VARS shorthand for substrates
194ROOTS = ['sed', 'nodmn', 'peb', 'cob', 'bou', 'blk', 'bed', 'chi', 'orgcn', 'orgal',
195 'orgrho', 'orgsp', 'dead', 'man', 'rov', 'ven', 'org', 'bi', 'tube', 'mid']
197# VARS substrates that don't have a shorthand
198SAMES = ['organism', 'man-made trash', 'Animal-made tube', 'debris',
199 'sediment', 'pebble', 'cobble', 'boulder', 'bedrock', 'midwater', 'surface', 'chimney']
201"""
202Suffixes and prefixes are additional descriptors added to a root. For example, 'scbedmn' consists of a prefix ('sc'),
203a root ('bed'), and a suffix ('mn'). These will be added together and the final translation will be
204'sediment-covered bedrock with manganese crust'.
205"""
206SUFFIXES = ['mn', 'pi', 'a', 'b', 't', 'po', 'hp']
207SUFFIXES_FORMS = ['led', 'cre', 'cha', 'cav', 'cra', 'bu', 'mo', 'ho', 'tr', 'du']
208PREFIXES = ['dik', 'fl', 'sc', 'tu', 'mu', 'ri', 'col', 'c', 'l', 'm']
209SUFFIXES_DEAD = ['cn', 'spo', ' org', 'bi']
210SUFFIXES_MAN = ['art', 'cem', 'fib', 'met', 'tra', 'ord', 'made', 'wre', 'pla']
212ALL_AFFIXES = SUFFIXES + SUFFIXES_FORMS + PREFIXES + SUFFIXES_DEAD + SUFFIXES_MAN
213ALL_AFFIXES.sort(key=len, reverse=True)