Skip to content

Commit d837d75

Browse files
authored
Merge pull request #301 from justinkadi/main
Adding spatial raster and vector references
2 parents dd09763 + 46a44a5 commit d837d75

File tree

2 files changed

+330
-0
lines changed

2 files changed

+330
-0
lines changed
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
## Spatial raster data
2+
3+
### About
4+
5+
The spatial raster files that we typically work with are `.tif` files.
6+
7+
### Processing `spatialRaster` entities
8+
9+
This reference tutorial will assume that the `otherEntity` of the `.tif` file already has an attribute list. If it exists, you can copy it. Otherwise, you can add one through the web editor before beginning this process, or create one through R to add.
10+
11+
In addition to the usual metadata information we'll need (description, attributes, physical), we'll need some additional metadata to create a `spatialRaster` entity. In particular, we'll need to know the *coordinate reference system* of the file, *number of bands*, *cell size*, and other information that we can gather from exploring the TIF files.
12+
13+
```{r, eval=FALSE}
14+
library(sf)
15+
library(dataone)
16+
library(datapack)
17+
library(uuid)
18+
library(arcticdatautils)
19+
library(EML)
20+
21+
### Set up node and gather data package
22+
d1c <- dataone::D1Client("...", "urn:node:...") # Setting the Member Node
23+
resourceMapId <- "..." # Get data package PID (resource map ID)
24+
dp <- getDataPackage(d1c, identifier = resourceMapId, lazyLoad = TRUE, quiet = FALSE) # Gather data package
25+
26+
### Load in Metadata EML
27+
metadataId <- selectMember(dp, name="sysmeta@formatId", value="https://eml.ecoinformatics.org/eml-2.2.0") # Get metadata PID
28+
doc <- read_eml(getObject(d1c@mn, metadataId)) # Read in metadata EML file
29+
```
30+
31+
#### Downloading the files
32+
33+
We will first download the files into our `datateam` server. We can do this using the `download.file()` function.
34+
```{r, eval=FALSE}
35+
download.file(url1, destination1)
36+
```
37+
38+
#### Changing the format IDs
39+
40+
We will then change the format IDs within the `sysmeta` and in the entity itself. The following example code assumes that you only have geoTIFF files.
41+
42+
```{r, eval=FALSE}
43+
pids <- selectMember(dp, "sysmeta@fileName", ".tif")
44+
45+
for(i in 1:length(pids)){
46+
sysmeta <- getSystemMetadata(d1c@mn, pids[i])
47+
48+
sysmeta@formatId <- "image/geotiff"
49+
50+
updateSystemMetadata(d1c@mn, pids[i], sysmeta)
51+
}
52+
53+
for(i in 1:length(doc$dataset$otherEntity)){
54+
doc$dataset$otherEntity[[i]]$entityType <- "image/geotiff"
55+
}
56+
```
57+
58+
#### Getting raster metadata
59+
60+
We have defined this function that we haven't added into `arcticdatautils` yet, but we will share below.
61+
62+
```{r, eval=FALSE}
63+
get_raster_metadata <- function(path, coord_name = NULL, attributeList){
64+
65+
# define a raste object
66+
raster_obj <- raster::raster(path)
67+
#message(paste("Reading raster object with proj4string of ", raster::crs(raster_obj)@projargs))
68+
69+
# determine coordinates of raster
70+
if (is.null(coord_name)){
71+
coord_name <- raster::crs(raster_obj)@projargs
72+
}
73+
74+
raster_info <- list(entityName = basename(path),
75+
attributeList = attributeList,
76+
spatialReference = list(horizCoordSysName = coord_name),
77+
horizontalAccuracy = list(accuracyReport = "unknown"),
78+
verticalAccuracy = list(accuracyReport = "unknown"),
79+
cellSizeXDirection = raster::res(raster_obj)[1],
80+
cellSizeYDirection = raster::res(raster_obj)[2],
81+
numberOfBands = raster::nbands(raster_obj),
82+
rasterOrigin = "Upper Left",
83+
rows = dim(raster_obj)[1],
84+
columns = dim(raster_obj)[2],
85+
verticals = dim(raster_obj)[3],
86+
cellGeometry = "pixel")
87+
return(raster_info)
88+
}
89+
```
90+
91+
This function takes in a path to the geotiff file and creates a `spatialRaster` object to be added into the EML.
92+
93+
To use this function, we will need to know the coordinate reference system as well to add as an argument. We can do this using the `raster` library in R.
94+
95+
```{r, eval=FALSE}
96+
# Create object with path to folder containing all the tif files
97+
raster_folder <- "path/to/your/rasters"
98+
99+
# Create list of the file names with full file paths
100+
raster_names <- list.files(raster_folder, full.names = TRUE)
101+
102+
# Find datum of a TIF file
103+
raster::raster(raster_names[[1]])
104+
raster::crs(raster::raster(raster_names[[1]]))@projargs
105+
106+
# Loop through all of the files
107+
for(i in 1:length(raster_names)){
108+
print(raster::crs(raster::raster(raster_names[[i]]))@projargs)
109+
}
110+
111+
# An example output of this section can look like
112+
# datum WGS84
113+
# projection UTM
114+
# zone = 22
115+
# units = m
116+
```
117+
118+
119+
We can use `arcticdatautils::get_coord_list()` to see a table of coordinate reference systems that we can use in our EML document. We can look through this table by looking for `WGS_1984_UTM_Zone_22` in the `horizCoordSysDef` column. Then, we can look for the corresponding `geogCoordSys` and see our coordinate system is `GCS_WGS_1984`.
120+
121+
#### Creating the `spatialRaster` object
122+
123+
We will first create an empty list for spatial raster entities to live. Then, we'll use the function to populate this list.
124+
125+
```{r, eval=FALSE}
126+
spatialRaster <- vector("list", length(raster_names))
127+
128+
for (i in 1:length(raster_names)) {
129+
spatialRaster[[i]] <- get_raster_metadata(raster_names[i],
130+
coord_name = "GCS_WGS_1984",
131+
attributeList = doc$dataset$otherEntity[[i]]$attributeList)
132+
}
133+
```
134+
135+
Note that this code assumes that the `otherEntity` and `raster_names` have the TIF files in the same order when assigning the attribute list. If this is different, we will need to find a way to assign the correct attribute list.
136+
137+
#### Adding `spatialRaster` entity
138+
139+
Before we're able to validate the EML doc, we'll need to assign the `spatialRaster` entities and remove the corresponding `otherEntities`.
140+
141+
```{r, eval=FALSE}
142+
doc$dataset$spatialRaster <- spatialRaster
143+
144+
doc$dataset$otherEntity <- NULL
145+
146+
eml_validate(doc)
147+
```
148+
149+
Note that if there are other files in the data package's `otherEntity` section that are not TIFs, we don't want to set them all to `NULL`. In that case, we would only `NULL` the corresponding files.
150+
151+
Then, remember to set the physicals for the `spatialRaster` section.
152+
153+
```{r, eval=FALSE}
154+
for (i in seq_along(doc$dataset$spatialRaster)) {
155+
raster_name <- doc$dataset$spatialRaster[[i]]$entityName
156+
157+
raster_pid <- selectMember(dp, "sysmeta@fileName", raster_name)
158+
physical <- arcticdatautils::pid_to_eml_physical(d1c@mn, raster_pid)
159+
160+
doc$dataset$spatialRaster[[i]]$physical <- physical
161+
}
162+
163+
eml_validate(doc)
164+
```
165+
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
## Spatial vector data
2+
3+
### About
4+
5+
Spatial vector files are geospatial files that represent geographic features using points, lines, and polygons. Spatial vector files can include GeoJSON (.json), ESRI shapefiles (.shp), GeoPackage (.gpkg), GeoParquet (.parquet), Google Keyhole Markup Language (.kml, .kmz), etc.
6+
7+
### Processing `spatialVector` entities
8+
9+
In addition to the usual metadata information we'll need (description, attributes, physical), we'll need some additional metadata to create a `spatialVector` entity. In particular, we'll need to know the *geometry* and *coordinate reference system* of the file.
10+
11+
To do this, we can either get this information from the submitter directly or upload the vector file into QGIS (or another GIS software) to explore its metadata. Otherwise, it will take some extra sleuthing and file processing in R from our end. Here we'll go over some techniques to gather this information from vector files. Then, we'll show how to create a `spatialVector` entity within an EML doc.
12+
13+
We'll start by setting our node, reading in the data package, and gathering the PID of our geospatial file:
14+
15+
```{r, eval=FALSE}
16+
library(sf)
17+
library(dataone)
18+
library(datapack)
19+
library(uuid)
20+
library(arcticdatautils)
21+
library(EML)
22+
23+
### Set up node and gather data package
24+
d1c <- dataone::D1Client("...", "urn:node:...") # Setting the Member Node
25+
resourceMapId <- "..." # Get data package PID (resource map ID)
26+
dp <- getDataPackage(d1c, identifier = resourceMapId, lazyLoad = TRUE, quiet = FALSE) # Gather data package
27+
28+
### Load in Metadata EML
29+
metadataId <- selectMember(dp, name="sysmeta@formatId", value="https://eml.ecoinformatics.org/eml-2.2.0") # Get metadata PID
30+
doc <- read_eml(getObject(d1c@mn, metadataId)) # Read in metadata EML file
31+
32+
### Read in spatial vector file
33+
spatial_vector_pid <- selectMember(dp, "sysmeta@fileName", "exampleFile.zip")
34+
```
35+
36+
#### Reading in the vector file
37+
38+
We'll first need to read in the vector file to extract the necessary metadata.
39+
40+
##### ESRI shapefiles
41+
42+
To find information from ESRI shapefiles, we can first use a function `arcticdatautils::read_zip_shapefile()`.
43+
44+
```{r, eval=FALSE}
45+
shapefile <- arcticdatautils::read_zip_shapefile(d1c@mn, shp_pid)
46+
```
47+
48+
##### GeoJSON, GeoPackage, and Parquet files
49+
50+
For GeoJSON, GeoPackage, and Parquet files, we don't have an arcticdatautils function to read the file from the node, so you'll need to download the file locally. We can use the `sf` library to read in these vector files instead.
51+
52+
```{r, eval=FALSE}
53+
geojson_file <- sf::st_read("~/path/to/vectorFile.json")
54+
geopackage_file <- sf::st_read("~/path/to/vectorFile.gpkg")
55+
geoparquet_file <- sf::st_read("~/path/to/vectorFile.parquet")
56+
```
57+
58+
#### Exploring vector file for metadata
59+
60+
To find information from ESRI shapefiles, GeoJSONs, GeoPackages, and Parquet files, we can use the `sf` library again to find the *coordinate reference system* and *geometry*.
61+
62+
```{r, eval=FALSE}
63+
### Get coordinate reference system
64+
sf::st_crs(file)
65+
66+
### Find the geometry
67+
sf::st_geometry(file)
68+
```
69+
70+
To reference the names of the coordinate reference systems, we can use `arcticdatautils::get_coord_list()`.
71+
72+
##### Additional files
73+
74+
For `.kml` and `.kmz` files, or other vector files not mentioned, there may be other libraries in R that can be used to explore their metadata. Uploading the file into QGIS or another GIS software is another quick way to retrieve this metadata information.
75+
76+
#### Edit format ID
77+
78+
Next, we'll want to check the format ID and, if necessary, change the format ID to reflect the correct file type. If it needs to be changed to an ESRI shapefile, we'll do the following:
79+
80+
```{r, eval=FALSE}
81+
spatial_vector_pid <- selectMember(dp, "sysmeta@fileName", "exampleFile.zip")
82+
sysmeta <- dataone::getSystemMetadata(d1c@mn, spatial_vector_pid)
83+
sysmeta@formatId <- "application/vnd.shp+zip"
84+
85+
dataone::updateSystemMetadata(d1c@mn, spatial_vector_pid, sysmeta)
86+
```
87+
88+
You can check for format IDs in this [documentation](https://cn.dataone.org/cn/v2/formats).
89+
90+
#### Creating `spatialVector` entity
91+
92+
Next, we'll be creating our `spatialVector` entity. We can use an `arcticdatautils` function to do this. Then, we'll add it to the EML doc.
93+
94+
One thing we'll need for this entity is an attribute list. If one was already created from the web editor, you can copy that over. Otherwise, you can use R to create and add one for this file. The example code below will assume that we're copying the attribute list over from the `otherEntity` of an ESRI shapefile.
95+
96+
```{r, eval=FALSE}
97+
spatialVector <- arcticdatautils::pid_to_eml_entity(d1c@mn,
98+
spatial_vector_pid,
99+
entity_type = "spatialVector",
100+
entityName = "exampleFile.zip",
101+
entityDescription = "spatial vector description",
102+
attributeList = doc$dataset$otherEntity[[i]]$attributeList,
103+
geometry = "Polygon",
104+
spatialReference = "list(horizCoordSysName = GCS_North_American_1983"))
105+
106+
doc$dataset$spatialVector[[1]] <- spatialVector
107+
108+
doc$dataset$otherEntity[[i]] <- NULL # removing the previous otherEntity of the file
109+
```
110+
111+
Finally, we'll run `eml_validate(doc)` to make sure everything is fine.
112+
113+
### Example script
114+
115+
Here is an example script combining everything when processing an ESRI shapefile:
116+
117+
```{r, eval=FALSE}
118+
### Set up node and gather data package
119+
d1c <- dataone::D1Client("PROD", "urn:node:ARCTIC") # Setting the Member Node
120+
resourceMapId <- "..." # Get data package PID (resource map ID)
121+
dp <- getDataPackage(d1c, identifier = resourceMapId, lazyLoad = TRUE, quiet = FALSE) # Gather data package
122+
123+
### Load in Metadata EML
124+
metadataId <- selectMember(dp, name="sysmeta@formatId", value="https://eml.ecoinformatics.org/eml-2.2.0") # Get metadata PID
125+
doc <- read_eml(getObject(d1c@mn, metadataId)) # Read in metadata EML file
126+
127+
### Creating Spatial Vector
128+
129+
# read in shapefile
130+
shp_pid <- selectMember(dp, "sysmeta@fileName", "PeatTess.zip")
131+
shapefile <- arcticdatautils::read_zip_shapefile(d1c@mn, shp_pid)
132+
133+
# get coordinate system
134+
sf::st_crs(shapefile) # -> GCS_North_American_1927
135+
136+
# find geometry of shapefile
137+
sf::st_geometry(shapefile) # -> polygon
138+
139+
### Edit formatId
140+
141+
# Format ID
142+
vector_pid <- selectMember(dp, "sysmeta@fileName", "PeatTess.zip")
143+
sysmeta <- getSystemMetadata(d1c@mn, vector_pid)
144+
sysmeta@formatId <- "application/vnd.shp+zip"
145+
146+
updateSystemMetadata(d1c@mn, vector_pid, sysmeta)
147+
148+
### Create spatial vector entity
149+
spatialVector <- pid_to_eml_entity(d1c@mn,
150+
shp_pid,
151+
entity_type = "spatialVector",
152+
entityName = "PeatTess.zip",
153+
entityDescription = "1km tessellation of the Alaska peatland map",
154+
attributeList = doc$dataset$otherEntity$attributeList,
155+
geometry = "Polygon",
156+
spatialReference = list(horizCoordSysName = "GCS_North_American_1927"))
157+
158+
# add spatial vector to doc
159+
doc$dataset$spatialVector[[1]] <- spatialVector
160+
161+
# NULL the corresponding otherEntity
162+
doc$dataset$otherEntity <- NULL
163+
164+
eml_validate(doc)
165+
```

0 commit comments

Comments
 (0)