biosensor-dcp-helper/generate_metadata.R at master · csmolnar/biosensor-dcp-helper · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
################ Important Inputs
## Define directory names - you will have to change this manually!
## Make sure the directories contain no spaces!
## The pipeline assumes there is a directory in the "inbox" directory that has the following name:
## Call this script with: Rscript generate_metadata_"your-platename(s)-here".R

plate_name = args = commandArgs(trailingOnly=TRUE)
# for debugging only
#plate_name = "000012095203__2019-12-09T17_58_26-Measurement_1"
print(paste0("Processing plate ", plate_name))

################ Sometimes you also have to change these variables
## json templates
# new_json_path_flat = "~/mcsaba/biosensor/src/dcp_helper/python/job_flatfield_template.json" #brightfield
# new_json_path_max = "~/mcsaba/biosensor/src/dcp_helper/python/job_maxproj_template.json" #flourescence channels
# new_json_path_seg = "~/mcsaba/biosensor/src/dcp_helper/python/job_segmentation_template.json" #

new_json_path_flat = "~/dcp_helper/python/job_flatfield_template.json" #brightfield
new_json_path_max = "~/dcp_helper/python/job_maxproj_template.json" #flourescence channels
new_json_path_seg = "~/dcp_helper/python/job_segmentation_template.json" #


## Name of channels
channel_v <- c("ch1", "ch2", "ch3", "ch4")
channel_n <- c("pc", "bf", "ce", "tm")

################ This is where the execution starts
################ Creating metadata
library(tidyverse)
library(dcphelper)
library(tictoc)

## Define paths

flatfield_dir = "flatfield"

new_path_base = paste0("~/dcp_helper/metadata/", plate_name,"/") #relative path acceptable
inbox_path_base= paste0("/home/ubuntu/bucket/inbox/", plate_name,"/Images/") #absolute path with /home/ubuntu/ required
flatfield_path_base= paste0("~/bucket/", flatfield_dir, "/", plate_name,"/") #deprecated: only used for result collection

## Creating target dir
lapply(new_path_base, dir.create, recursive=TRUE) # Do not execute this from a local machine if you expect other AWS services to access the directory later on

tic()
#### Creating metadata directories
print("creating pc metadata")
for(j in 1:length(inbox_path_base)){
  metadata_split_path <- create_flatfield_metadata_split(
    path = inbox_path_base[j],
    channel_of_interest = channel_v[1], #brightfield
    name = "pc",
    json_path = new_json_path, #not needed
    path_base = new_path_base[j],
    flatfield_dir = flatfield_dir,
    force = FALSE,
    include_brightfield_proj = TRUE, # setting this value to FALSE will skip brightfield image analysis
    include_additional_proj = TRUE)
}
toc()

tic()
for(i in 2:length(channel_n)){
  print(paste0("creating ", channel_n[i], " metadata"))
  for(j in 1:length(inbox_path_base)){
    metadata_split_path <- create_flatfield_metadata_split(
      path = inbox_path_base[j],
      channel_of_interest = channel_v[i], #brightfield
      name = channel_n[i],
      json_path = new_json_path, #not needed
      path_base = new_path_base[j],
      flatfield_dir = flatfield_dir,
      force = FALSE)
  }
}
toc()

################ Grouping data

tic()
print("Creating shell script for grouping")
path <- c()
for(i in 1:length(plate_name)){
  path <- generate_group(plate_name[i], channel_n, new_path_base[i])
  print(path)
  print("Grouping data using python script")
  system(path)
}
toc()

################ Aggregating information and executable file
print("Aggregating information and executable file")

tic()
for(j in new_path_base){
  link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>%
                       stringr::str_subset(pattern = ".csv") %>%
                       stringr::str_subset(pattern = "pc"),
                     json_path = new_json_path_seg,
                     path_base = j)
}
toc()

tic()
for(j in new_path_base){
  link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>%
                       stringr::str_subset(pattern = ".csv") %>%
                       stringr::str_subset(pattern = "bf"),
                     json_path = new_json_path_flat,
                     path_base = j)
}
toc()

tic()
channel_n_mod <- channel_n[3:4]
for(j in new_path_base){
  for(i in 1:length(channel_n_mod)){
    link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>%
                         stringr::str_subset(pattern = ".csv") %>%
                         stringr::str_subset(pattern = channel_n_mod[i]),
                       json_path = new_json_path_max,
                       path_base = j)
  }
}
toc()

### Grouping final job files

tic()
for(j in new_path_base){
  for(i in 1:length(channel_n)){
    group_jobs_bash(path_base = j,
                    name = channel_n[i],
                    letter_row_interval = c(1:16),
                    number_col_interval = c(1:24))
  }
}
toc()

### Pushing metadata to S3 bucket
system("./sync_metadata_to_bucket.sh")