Skip to content

Commit 915634b

Browse files
committed
Merge branch 'master' of https://github.com/IFB-ElixirFr/EBAII
2 parents 9f7a403 + 76232d2 commit 915634b

File tree

6 files changed

+268
-146
lines changed

6 files changed

+268
-146
lines changed

2024/ebaiin1/chip-seq/README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## EBAII 2024 - ChIP-seq course
1+
## EBAII 2025 - ChIP-seq course
22

33
### Hands-on
44
Go to [hands-on](hands-on/hands-on.html)
@@ -14,16 +14,16 @@ Go to [hands-on](hands-on/hands-on.html)
1414
| 12:45 | 14:30 | 01:45 | **Lunch Break** | |
1515
| 14:30 | 16:00 | 01:30 | Mapping | Stéphanie Le Gras |
1616
| 16:00 | 16:30 | 00:30 | **Break** | |
17-
| 16:30 | 17:45 | 01:15 | Mapping QC, Visualization | Tao Ye |
18-
| 17:45 | 19:00 | 01:15 | Mapping QC, Visualization, Peak calling | Tao Ye |
17+
| 16:30 | 17:45 | 01:15 | Mapping QC, Visualization | Pascal Martin |
18+
| 17:45 | 19:00 | 01:15 | Mapping QC, Visualization, Peak calling | Pascal Martin |
1919

2020

2121
#### Wednesday
2222

2323

2424
| **Start** | **End** | **Duration** | **Topics** | **Teacher** |
2525
| -------- | --------- | --------- | ----------- | ----------- |
26-
| 10:45 | 12:25 | 02:00 | Motif analysis | Morgane THOMAS-CHOLLIER |
26+
| 10:45 | 12:25 | 02:00 | Motif analysis | Elodie Darbo |
2727
| 12:45 | 14:30 | 01:45 | **Break** | |
2828
| 14:30 | 16:00 | 01:30 | Free time |
2929
| 16:00 | 16:30 | 00:30 | **Break** | |
@@ -35,4 +35,4 @@ Go to [hands-on](hands-on/hands-on.html)
3535

3636
| **Start** | **End** | **Duration** | **Topics** | **Teacher** |
3737
| -------- | --------- | --------- | ----------- | ----------- |
38-
| 8:30 | 10:15 | 01:45 | Scripting / Workflow | Elodie Darbo |
38+
| 8:30 | 10:15 | 01:45 | Scripting / Workflow | Rachel Legendre |
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
## - Associate peaks to closest genes
2+
# 5. Add gene symbol annotation using R with Rstudio
3+
4+
setwd("/shared/projects/2538_eb3i_n1_2025/atelier_chipseq/EBAII2025_chipseq/07-PeakAnnotation")
5+
6+
d <- read.table("FNR_anaerobic_idr_annotated_peaks.tsv", sep="\t", header=TRUE)
7+
8+
gene.symbol <- read.table("../data/Escherichia_coli_K_12_MG1655.annotation.tsv.gz", header=FALSE)
9+
10+
d.annot <- merge(d[,c(1,2,3,4,5,6,8,10,11)], gene.symbol, by.x="Nearest.PromoterID", by.y="V1")
11+
12+
colnames(d.annot)[2] <- "PeakID" # name the 2d column of the new file "PeakID"
13+
colnames(d.annot)[dim(d.annot)[2]] <- "Gene.Symbol"
14+
write.table(d.annot, "FNR_anaerobic_idr_final_peaks_annotation.tsv", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE)
15+
16+
## - Performing a first evaluation of peak sets using R
17+
# 1. Go to Rstudio and execute the R code below (show results in the report)
18+
19+
library(RColorBrewer)
20+
library(ChIPseeker)
21+
library(TxDb.Mmusculus.UCSC.mm9.knownGene)
22+
library(org.Mm.eg.db)
23+
# define the annotation of the mouse genome
24+
txdb = TxDb.Mmusculus.UCSC.mm9.knownGene
25+
# define colors
26+
col = brewer.pal(9,'Set1')
27+
28+
# read the peaks for each dataset
29+
peaks.forebrain = readPeakFile('GSM348064_p300_peaks.txt.gz')
30+
peaks.midbrain = readPeakFile('GSM348065_p300_peaks.txt.gz')
31+
peaks.limb = readPeakFile('GSM348066_p300_peaks.txt.gz')
32+
33+
# create a list containing all the peak sets
34+
all.peaks = list(forebrain=peaks.forebrain,
35+
midbrain=peaks.midbrain,
36+
limb=peaks.limb)
37+
38+
# check the number of peaks for the forebrain dataset
39+
length(peaks.forebrain)
40+
41+
# compute the number of peaks for all datasets using the list object
42+
sapply(all.peaks,length)
43+
44+
# display this as a barplot
45+
barplot(sapply(all.peaks,length),col=col)
46+
47+
# statistics on the peak length for forebrain
48+
summary(width(peaks.forebrain))
49+
50+
# size distribution of the peaks
51+
peaks.width = lapply(all.peaks,width)
52+
lapply(peaks.width,summary)
53+
54+
# boxplot of the sizes
55+
boxplot(peaks.width,col=col)
56+
57+
# genome wide distribution
58+
covplot(peaks.forebrain, weightCol="Maximum.Peak.Height")
59+
60+
# define gene promoters
61+
promoter = getPromoters(TxDb=txdb, upstream=5000, downstream=5000)
62+
63+
# compute the density of peaks within the promoter regions
64+
tagMatrix = getTagMatrix(peaks.limb, windows=promoter)
65+
66+
# plot the density
67+
tagHeatmap(tagMatrix, palette = "RdYlBu")
68+
69+
peakAnno.forebrain = annotatePeak(peaks.forebrain, tssRegion=c(-3000, 3000), TxDb=txdb, annoDb="org.Mm.eg.db")
70+
peakAnno.midbrain = annotatePeak(peaks.midbrain, tssRegion=c(-3000, 3000), TxDb=txdb, annoDb="org.Mm.eg.db")
71+
peakAnno.limb = annotatePeak(peaks.limb, tssRegion=c(-3000, 3000), TxDb=txdb, annoDb="org.Mm.eg.db")
72+
73+
# distribution of genomic compartments for forebrain peaks
74+
plotAnnoPie(peakAnno.forebrain)
75+
76+
# for all the peaks
77+
plotAnnoBar(list(forebrain=peakAnno.forebrain, midbrain=peakAnno.midbrain,limb=peakAnno.limb))
78+
79+
80+
### - functional annotation
81+
82+
# load the library
83+
library(clusterProfiler)
84+
85+
# define the list of all mouse genes as a universe for the enrichment analysis
86+
universe = mappedkeys(org.Mm.egACCNUM)
87+
88+
## extract the gene IDs of the forebrain target genes
89+
genes.forebrain = peakAnno.forebrain@anno$geneId
90+
ego.forebrain = enrichGO(gene = genes.forebrain,
91+
universe = universe,
92+
OrgDb = org.Mm.eg.db,
93+
ont = "BP",
94+
pAdjustMethod = "BH",
95+
pvalueCutoff = 0.01,
96+
qvalueCutoff = 0.05,
97+
readable = TRUE)
98+
99+
# display the results as barplots
100+
barplot(ego.forebrain,showCategory=10)
101+
102+
103+
104+
105+
106+
107+
108+

2024/ebaiin1/chip-seq/hands-on/hands-on.Rmd

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ Bowtie output is a [SAM](https://samtools.github.io/hts-specs/SAMv1.pdf) file. T
294294
* -o: to specify a output file name
295295
```{bash eval=FALSE}
296296
## First load samtools
297-
module load samtools/1.18
297+
module load samtools/1.21
298298
## Then run samtools
299299
samtools view -@ 2 -q 10 -b FNR_IP_ChIP-seq_Anaerobic_A.sam | samtools sort -@ 2 - -o FNR_IP_ChIP-seq_Anaerobic_A.bam
300300
```
@@ -311,7 +311,7 @@ gzip FNR_IP_ChIP-seq_Anaerobic_A.sam
311311

312312
7. Once it's done, unload the tools you used
313313
```{bash eval=FALSE}
314-
module unload samtools/1.18 bowtie2/2.5.1
314+
module unload samtools/1.21 bowtie2/2.5.1
315315
```
316316

317317
## - Map the second replicate and the control
@@ -350,23 +350,23 @@ To determine the number of duplicated reads marked by Picard, we can run the `sa
350350

351351
```{bash eval=FALSE}
352352
## Add samtools to your environment
353-
module load samtools/1.18
353+
module load samtools/1.21
354354
## run samtools
355355
samtools flagstat Marked_FNR_IP_ChIP-seq_Anaerobic_A.bam
356356
```
357357

358358
**Run picard MarkDuplicates on the 2 other samples. How many duplicates are found in each sample?**
359359

360-
Go back to working home directory (i.e /shared/projects/<your_project>/EBAII2025_chipseq/)
360+
Go back to working home directory (i.e /shared/projects/\<your_project\>/EBAII2025_chipseq/)
361361
```{bash eval=FALSE}
362362
## Unload picard and samtools
363-
module unload samtools/1.18 picard/2.23.5
363+
module unload samtools/1.21 picard/2.23.5
364364
## If you are in 02-Mapping/bam
365365
cd ../..
366366
```
367367

368368
# - ChIP quality controls
369-
**Goal**: This exercise aims at plotting the **Lorenz curve** to assess the quality of the chIP.
369+
**Goal**: This exercise aims at plotting the **Lorenz curve** to assess the quality of the ChIP.
370370

371371
## - Plot the Lorenz curve with Deeptools
372372
1. Create a directory named **03-ChIPQualityControls** in which to put mapping results for IP
@@ -396,13 +396,14 @@ plotFingerprint \
396396
```
397397
4. If plotFingerprint takes to much time to run. Take the file that has already been prepared for the training.
398398
```{bash eval=FALSE}
399-
cp /shared/home/slegras/2421_m22_bims/slegras/03-ChIPQualityControls/fingerprint.png .
399+
cp /shared/projects/2538_eb3i_n1_2025/atelier_chipseq/EBAII2025_chipseq/03-ChIPQualityControls/fingerprint_10000.png .
400400
```
401-
5. Go find the file using the directory tree on the left of the Jupyterlab panel and click on the fingerprint.png file to display it in Jupyterlab.
401+
5. Go find the file using the directory tree on the left of the Jupyterlab panel and click on the fingerprint_10000.png file to display it in Jupyterlab.
402402

403403
**Look at the result files fingerprint.png (add the plot to this report). Give an explanation of the curves?**
404404

405-
Go back to the working home directory (i.e /shared/projects/2421_m22_bims/\<login\>)
405+
Go back to working home directory (i.e /shared/projects/\<your_project\>/EBAII2025_chipseq)
406+
406407
```{bash eval=FALSE}
407408
## Unload deepTools
408409
module unload deeptools/3.5.4
@@ -514,7 +515,7 @@ bamCoverage \
514515
**Go back to the genes we looked at earlier: pepT, ycfP (add screenshots to this report). Look at the shape of the signal.**
515516
**Keep IGV opened.**
516517

517-
Go back to working home directory (i.e /shared/projects/<your_project>/EBAII2025_chipseq)
518+
Go back to working home directory (i.e /shared/projects/\<your_project\>/EBAII2025_chipseq)
518519
```{bash eval=FALSE}
519520
## If you are in 04-Visualization
520521
cd ..
@@ -650,7 +651,7 @@ idr \
650651

651652
**Add the IDR graph to this report. How many peaks are found with the IDR method?**
652653

653-
4. Remove IDR and MACS2 from your environment and go back to working home directory (i.e /shared/projects/<your_project>/EBAII2025_chipseq)
654+
4. Remove IDR and MACS2 from your environment and go back to working home directory (i.e /shared/projects/\<your_project\>/EBAII2025_chipseq)
654655
```{bash eval=FALSE}
655656
module unload macs2/2.2.7.1
656657
module unload idr/2.0.4.2
@@ -711,7 +712,7 @@ Your directory structure should be like this:
711712
3. Extract peak sequence in fasta format
712713
```{bash eval=FALSE}
713714
## First load samtools
714-
module load samtools/1.18
715+
module load samtools/1.21
715716
## Create an index of the genome fasta file
716717
samtools faidx ../data/Escherichia_coli_K12.fasta
717718
@@ -727,7 +728,7 @@ bedtools getfasta \
727728

728729
## - Motif discovery with RSAT
729730
1. Open a connection to a Regulatory Sequence Analysis Tools server. You can choose between various website mirrors.
730-
* Teaching Server (recommended for this training) [https://rsat.france-bioinformatique.fr/teaching/](https://rsat.france-bioinformatique.fr/teaching/)
731+
* Teaching Server (recommended for this training) [https://rsat.eead.csic.es/plants/](https://rsat.eead.csic.es/plants/)
731732
2. In the left menu, click on **NGS ChIP-seq** and then click on **peak-motifs**. A new page opens, with a form
732733
3. The default peak-motifs web form only displays the essential options. There are only two mandatory parameters.
733734
* The **title box**, which you will set as **FNR Anaerobic** . The **sequences**, that you will **upload from your computer**, by clicking on the button Choose file, and select the file **FNR_anaerobic_idr_peaks.fa** from your computer.
@@ -844,7 +845,7 @@ write.table(d.annot, "FNR_anaerobic_idr_final_peaks_annotation.tsv", col.names=T
844845

845846
**What are all the possible gene features? (see in column Annotation - extract information like promoter-TSS, TSS, ...). Create a plot (pie chart, barplot...) showing the proportion of each of them (include both the plot and the code that created it in the report).**
846847

847-
6. Go back to working home directory (i.e /shared/projects/training/\<login\>/M2.2-BIMS-epigenomique)
848+
6. Go back to working home directory (i.e /shared/projects/\<your_project\>/EBAII2025_chipseq)
848849
```{bash eval=FALSE}
849850
## If you are in 07-PeakAnnotation
850851
cd ..
@@ -878,8 +879,6 @@ You should now have downloaded 3 files:
878879
> GSM348066_p300_peaks.txt.gz (limb)
879880
880881
```{bash eval=TRUE, include=FALSE}
881-
mkdir 07-PeakAnnotation-bonus
882-
cd 07-PeakAnnotation-bonus
883882
curl -O https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM348nnn/GSM348064/suppl/GSM348064_p300_peaks.txt.gz
884883
curl -O https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM348nnn/GSM348065/suppl/GSM348065_p300_peaks.txt.gz
885884
curl -O https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM348nnn/GSM348066/suppl/GSM348066_p300_peaks.txt.gz

2024/ebaiin1/chip-seq/hands-on/hands-on.html

Lines changed: 55 additions & 55 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)