setwd("/DATA1/R_package_development")
library(TCGAbiolinks)

## 1. Primary Tumor Gene expression data download for GBM ##
query_STAR_TP <- GDCquery(
  project = "TCGA-GBM", 
  data.category = "Transcriptome Profiling",
  data.type = "Gene Expression Quantification",
  experimental.strategy = "RNA-Seq",
  workflow.type = "STAR - Counts", #, only abailabe here STAR-Counts
  sample.type = c("Primary Tumor"))
# Download a list of barcodes with platform IlluminaHiSeq_RNASeqV2
GDCdownload(query_STAR_TP, method = "api")
GBM_Rnaseq_STAR_TP <- GDCprepare(query_STAR_TP, save = T, save.filename = "1.- query_STAR_TP.rda")
df_primary_RNA_seq <- GBM_Rnaseq_STAR_TP@assays@data@listData$unstranded
df_primary_RNA_seq.t <- t(df_primary_RNA_seq)
dim(df_primary_RNA_seq.t)
rownames(df_primary_RNA_seq.t) <- GBM_Rnaseq_STAR_TP@colData@rownames
colnames(df_primary_RNA_seq.t) <- GBM_Rnaseq_STAR_TP@rowRanges@elementMetadata@listData$gene_name
rownames(df_primary_RNA_seq.t)=substr(rownames(df_primary_RNA_seq.t),1,15)

### New code from data download from UCSC-Xena ###
install.packages("UCSCXenaTools")
library("UCSCXenaTools")
#data(XenaData)
#head(XenaData)
# The options in XenaFilter function support Regular Expression
XenaGenerate(subset = XenaHostNames=="tcgaHub") %>% 
  XenaFilter(filterDatasets = "copynumber") %>% 
  XenaFilter(filterDatasets = "GBM") -> df_todo
df_todo
XenaQuery(df_todo) %>%
  XenaDownload() -> xe_download
options(use_hiplot = TRUE)
XenaQuery(df_todo) %>%
  XenaDownload() -> xe_download
cli = XenaPrepare(xe_download)
class(cli)
#> [1] "list"
names(cli)
GISTIC_CNV_all_data_by_genes <- cli$Gistic2_CopyNumber_Gistic2_all_data_by_genes.gz
GISTIC_CNV_all_threshold_by_genes <- cli$Gistic2_CopyNumber_Gistic2_all_thresholded.by_genes.gz
CNV_data_transposed <- t(GISTIC_CNV_all_threshold_by_genes)
colnames(CNV_data_transposed) <- CNV_data_transposed[1, ]

## For clinical data ##
##  Download the clinical data ### 
query.GBM.clinic <- GDCquery(project = "TCGA-GBM", 
                              data.category = "Clinical",
                              data.type = "Clinical Supplement", 
                              data.format = "BCR Biotab")
GDCdownload(query.GBM.clinic, directory = "/DATA1/R_package_development", files.per.chunk = 10)
data_clinical <- GDCprepare(query.GBM.clinic, save = TRUE, save.filename = "query.GBM.clinic", directory = "/DATA1/R_package_development", summarizedExperiment = TRUE)
Clinical_data_GBM <- data_clinical$clinical_patient_GBM


### for COAD dataset ###

## 1. Primary Tumor Gene expression data download for COAD ##

query_STAR_TP <- GDCquery(
  project = "TCGA-COAD", 
  data.category = "Transcriptome Profiling",
  data.type = "Gene Expression Quantification",
  experimental.strategy = "RNA-Seq",
  workflow.type = "STAR - Counts", #, only abailabe here STAR-Counts
  sample.type = c("Primary Tumor"))
# Download a list of barcodes with platform IlluminaHiSeq_RNASeqV2
GDCdownload(query_STAR_TP, method = "api")
COAD_Rnaseq_STAR_TP <- GDCprepare(query_STAR_TP, save = T, save.filename = "1.- query_STAR_TP.rda")
df_primary_RNA_seq <- COAD_Rnaseq_STAR_TP@assays@data@listData$unstranded
df_primary_RNA_seq.t <- t(df_primary_RNA_seq)
dim(df_primary_RNA_seq.t)
rownames(df_primary_RNA_seq.t) <- COAD_Rnaseq_STAR_TP@colData@rownames
colnames(df_primary_RNA_seq.t) <- COAD_Rnaseq_STAR_TP@rowRanges@elementMetadata@listData$gene_name
rownames(df_primary_RNA_seq.t)=substr(rownames(df_primary_RNA_seq.t),1,15)

### New code from data download from UCSC-Xena ###
install.packages("UCSCXenaTools")
library("UCSCXenaTools")
#data(XenaData)
#head(XenaData)
# The options in XenaFilter function support Regular Expression
XenaGenerate(subset = XenaHostNames=="tcgaHub") %>% 
  XenaFilter(filterDatasets = "copynumber") %>% 
  XenaFilter(filterDatasets = "COAD") -> df_todo
df_todo
XenaQuery(df_todo) %>%
  XenaDownload() -> xe_download
options(use_hiplot = TRUE)
XenaQuery(df_todo) %>%
  XenaDownload() -> xe_download
cli = XenaPrepare(xe_download)
class(cli)
#> [1] "list"
names(cli)
GISTIC_CNV_all_data_by_genes <- cli$Gistic2_CopyNumber_Gistic2_all_data_by_genes.gz
GISTIC_CNV_all_threshold_by_genes <- cli$Gistic2_CopyNumber_Gistic2_all_thresholded.by_genes.gz
CNV_data_transposed <- t(GISTIC_CNV_all_threshold_by_genes)
colnames(CNV_data_transposed) <- CNV_data_transposed[1, ]

## For clinical data ##
##  Download the clinical data ### 
query.COAD.clinic <- GDCquery(project = "TCGA-COAD", 
                              data.category = "Clinical",
                              data.type = "Clinical Supplement", 
                              data.format = "BCR Biotab")
GDCdownload(query.COAD.clinic, directory = "/DATA1/R_package_development", files.per.chunk = 10)
data_clinical <- GDCprepare(query.COAD.clinic, save = TRUE, save.filename = "query.COAD.clinic", directory = "/DATA1/R_package_development", summarizedExperiment = TRUE)
Clinical_data_COAD <- data_clinical$clinical_patient_COAD
