#==============================================================================
#Dataset:      Africa Youtube species Data Analysis
#Author:       Anthony Basooma;
#Section:      Fish Biology and Ecology (Ecological Modelling)
#Organisation: National Fisheries Resources Research Institute
#=============================================================================
#Setting working directory
setwd(r"(C:\Users\anthbasooma\Documents\Anthony\personal publications\youtube)")
#load('sentiment.RData')
#=====================================================================
library(tidyr)
library(dplyr)
library(ggplot2)
library(extrafont)
library(lubridate)
library(caret)
library(vosonSML)
library(syuzhet)
library(tm)
library(e1071)
library(GGally)
library(ggwordcloud)
library(tidytext)
library(sf)
library(ggspatial)
options(scipen = 999)
options(dplyr.summarise.inform = FALSE)

youtube <-  read.csv(file = "smedia.csv", header = TRUE, strip.white = TRUE, 
                    sep = ',', na.strings = c('', NA))%>%filter(!views>3e7) %>% 
  mutate(yposted = as.factor(year(dposted)),
         ddiff = abs(time_length(interval(ymd(dposted), ymd(dret)), "day")),
         vpd = views/ddiff, cts = 1,
         dpd = unlikes/ddiff, Duration = Duration/100,
         lpd = likes/ddiff, cpd = comments/ddiff,
         title_len = nchar(iconv(title, "latin1", "UTF-8",sub='')))

admbound <- sf::st_read('ne_10m_admin_0_countries/newadminlayer.shp', quiet = TRUE) 

ytube <- youtube %>% group_by(title) %>% filter(!row_number() == 2)


mdata <- ytube %>% group_by(message) %>% 
  summarise(cts = length(message))

#==========
#Descriptive statistics 
des <- ytube %>% mutate(platform= 'YT') %>% 
  group_by(platform) %>% summarise(totalviews = sum(views),
                                   totallike = sum(likes), totdur = sum(Duration),
                                   totaldilikes = sum(unlikes),
                                   totaldcomments = sum(comments, na.rm = T))

des <- ytube %>% mutate(platform= 'YT')  %>% group_by(platform, channel) %>%
  slice(which.max(subscriber)) %>% 
  group_by(platform) %>% summarise(totalviews = sum(subscriber))

#================
#Are videos specific to a particular region in Africa
region <- ytube %>% group_by(region) %>% 
  summarise(totvideo = length(vpd),
    meanvpd = mean(vpd), meanlikes = mean(lpd), meanun = mean(dpd), 
    meancpd = mean(cpd, na.rm = T),
            sdvpd = sd(vpd)/sqrt(length(vpd)), 
            sdlikes = sd(lpd)/sqrt(length(vpd)),
            sdun = sd(dpd)/sqrt(length(vpd)),
    sdcpd = sd(cpd, na.rm=TRUE)/sqrt(length(cpd))) %>% 
  mutate(meanvpd1 = paste(round(meanvpd, 2),'±', round(sdvpd, 2)),
         meanlike1 = paste(round(meanlikes, 2),'±', round(sdlikes, 2)),
         meanun1 = paste(round(meanun, 2),'±', round(sdun, 2)),
         meancpd1 = paste(round(meancpd, 2),'±', round(sdcpd, 2))) %>% 
  dplyr::select(1, 2, 11:14)


xlsx::write.xlsx(region, 'regional2.xlsx')

kruskal.test(vpd~region, data = ytube)

kruskal.test(lpd~region, data = ytube)

kruskal.test(dpd~region, data = ytube)

kruskal.test(cpd~region, data = ytube)



#1b. at country level
geodata <- ytube %>% filter(nature %in% 'country')

ctry <- geodata %>% group_by(geo) %>% 
  summarise(totvideo = length(vpd),
            meanvpd = mean(vpd), meanlikes = mean(lpd), meanun = mean(dpd), 
            meancpd = mean(cpd, na.rm = T),
            sdvpd = sd(vpd)/sqrt(length(vpd)), 
            sdlikes = sd(lpd)/sqrt(length(vpd)),
            sdun = sd(dpd)/sqrt(length(vpd)),
            sdcpd = sd(cpd, na.rm=TRUE)/sqrt(length(cpd))) %>% 
  mutate(meanvpd1 = paste(round(meanvpd, 2),'±', round(sdvpd, 2)),
         meanlike1 = paste(round(meanlikes, 2),'±', round(sdlikes, 2)),
         meanun1 = paste(round(meanun, 2),'±', round(sdun, 2)),
         meancpd1 = paste(round(meancpd, 2),'±', round(sdcpd, 2))) %>% 
  dplyr::select(1, 2, 11:14)

kruskal.test(vpd~geo, data = geodata)

kruskal.test(lpd~geo, data = geodata)

kruskal.test(dpd~geo, data = geodata)

kruskal.test(cpd~geo, data = geodata)

#graphical presentation
#======================
geosum <- geodata %>% ungroup() %>% 
  dplyr::select(geo, vpd, lpd, cpd) %>% 
  group_by(geo) %>% mutate(nv = length(vpd)) %>% 
  gather(cates, cvalue, -c(geo, nv)) %>% 
  group_by(geo, cates) %>% 
  summarise(nv = log(mean(nv)),
    meancates = log(mean(cvalue, na.rm = T)),
    sdvpd = log(sd(cvalue, na.rm = T)/sqrt(length(cvalue)))) %>% 
  mutate(cates = case_when(cates=='vpd'~'Views per day',
                           cates=='lpd'~'Likes per day',
                           TRUE~'Comments per day'),
         cates = factor(cates, levels=c("Views per day", "Likes per day", "Comments per day")))


#piechart for country video posting
#==================================

ctvideo <- geodata %>% 
  group_by(geo) %>% summarise(ct = length(geo)) %>% 
  mutate(ctry = case_when(ct>10~geo,
                          TRUE~'Others')) %>% 
  group_by(ctry) %>% summarise(ct = sum(ct)) %>% 
   mutate(per= prop.table(ct) * 100)

ggplot(data = ctvideo,
       aes(x="", y=per, fill=ctry))+
  geom_bar(width = 1, stat = "identity")+
  coord_polar("y", start=0)+
  scale_fill_viridis_d(direction = -1)+
  theme_void()+
  geom_text(aes(label = paste(ctry, '\n', round(per,2),'%')),
            position = position_stack(vjust = 0.5),family = 'Cambria', color='black')+
  theme(text = element_text(family = 'Cambria', size = 11),
        legend.position = 'None')



#==============
bg <- theme(panel.background = element_blank(),
            panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            legend.background = element_blank())

library(tidytext)
ggplot(geosum, aes(x= reorder_within(geo, meancates, cates), y=meancates, fill=cates))+
  geom_bar(stat = 'identity')+coord_flip()+
  scale_x_reordered()+
  facet_wrap(~cates, scales = 'free', ncol=4)+ theme_bw()+#bg+
  scale_y_continuous(expand = expansion(mult=c(0,0.051))) +
  scale_fill_manual(values = c('grey20', 'grey50', 'grey80'))+
  geom_errorbar(aes(ymin= meancates, ymax= meancates+sdvpd), width=0.5)+
  theme(#text = element_text(family = 'Cambria', size = 10),
        axis.text.y = element_text(hjust = 0),
        #axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = 'None')+
  #geom_text(aes(label= nv, color=nv), size= 3, hjust = -7.2)+
  scale_color_gradient(low = 'grey30', high = 'grey5')+
  labs(y='Mean views, likes, and comments (±SE)', x='Countries',
       color='Videos')


#new plot
ggplot(data = geosum, #%>% filter(cates=='Views per day'), 
       aes(x= reorder_within(geo, meancates, cates), 
                   y=meancates, fill=cates, color = cates, shape = cates))+
  geom_point()+
  coord_flip()+
  scale_x_reordered()+theme_bw()+bg+
  facet_wrap(~cates, scales = 'free', ncol=4)+ 
  scale_y_continuous(expand = expansion(mult=c(0,0.051))) +
  geom_errorbar(aes(ymin= meancates-sdvpd, ymax= meancates+sdvpd), width=0.5, linetype='solid')+
  theme(axis.text.y = element_text(hjust = 0),
        text = element_text(size = 13),
    legend.position = 'None')+
  ggpubr::stat_compare_means(label = "p.signif", method = "wilcox.test", ref.group = ".all.")  +   
  scale_color_manual(values = c('darkblue', 'darkcyan', 'darkorange4'))+
  geom_hline(yintercept = 0, linetype='dotted', color = 'grey50', linewidth = 0.5)+
  labs(y='Logarithmic mean views, likes and comments per day (±SE)', x='Countries',
       color='Videos')

ggsave('reviews/vpdlpd1_new.png', dpi = 1000, width=10.4, height=6.4, units='in')

dn <- as.data.frame(dunn.test::dunn.test(geodata$lpd, geodata$geo,list = TRUE))%>%
  filter(P<=0.025)
xlsx::write.xlsx(dn, 'differencescountriesdpd.xlsx')


#ecoregions
ecoregions <- st_read('reviews/Africa_Ecoregions/ecoregion_cleaned.shp', quiet = TRUE)
ecoregions1 <- st_transform(ecoregions, crs = 4326)
centroids <- read.csv(file = 'reviews/centroids.csv') %>% select(-c(1,3))


#merge with centroids
geocentroid <- as.data.frame(geodata) %>% mutate(id = row_number()) %>% 
  ungroup() %>% 
  select(id, geo, vpd, cpd, lpd) %>% 
  left_join(y= centroids %>% rename(geo=name), by='geo') %>% 
  filter(!is.na(lat)) %>% 
  sf::st_as_sf(coords=c('lon','lat'), crs = st_crs(4326))


ecojoin <- st_join(geocentroid, st_make_valid(ecoregions1), join = st_intersects)

#get the videos per biome

vpbiom <- ecojoin %>% st_drop_geometry() %>% 
  group_by(ECO_NAME) %>% 
  summarise(nv = length(ECO_NAME),
            meanvpd= mean(vpd, na.rm = TRUE),
            meanlpd= mean(lpd, na.rm = TRUE),
            meancpd= mean(cpd, na.rm = TRUE)) %>% 
  filter(!is.na(ECO_NAME))%>% 
  mutate(nv2= case_when(nv>0&nv<25~'1-25', nv>=25&nv<50~'25-50', 
                        nv>=50&nv<75~'50-75', nv>=75&nv<100~'75-100'),
         meanvp= case_when(meanvpd>0&meanvpd<25~'1-200', meanvpd>=200&meanvpd<400~'200-400', 
                           meanvpd>=400&meanvpd<600~'400-600', meanvpd>=600&meanvpd<800~'600-800'),
         meanlp= case_when(meanlpd>0&meanlpd<3~'1-3', meanlpd>=3&meanlpd<6~'3-6', 
                           meanlpd>=6&meanlpd<9~'6-9', meanlpd>=9&meanlpd<12~'9-12'),
         meancp= case_when(meancpd>0&meancpd<0.4~'0.1-0.4', meancpd>=0.4&meancpd<0.8~'0.4-0.8', 
                           meancpd>=0.8&meancpd<1.4~'0.8-1.4'))

#rejoin on the polygons

ecof <- ecoregions1 %>% left_join(vpbiom, by='ECO_NAME') %>% 
  replace(is.na(.), 0) 


bg2 <- theme(legend.position.inside = c(0.2, 0.3),
             legend.position = 'inside',
             text = element_text(size = 10),
             legend.text = element_text(size = 10),
             plot.margin = unit(c(0, 0, 0, 0), units = 'cm'))
vpdd <- ggplot()+
  geom_sf(data = ecof, aes(fill=nv2))+
  scale_fill_viridis_d()+
  theme_bw()+bg+bg2+
  labs(x='Longitude', y='Latitude', fill='videos', title = '(A)')

mvpd <- ggplot()+
  geom_sf(data = ecof, aes(fill=meanvp))+
  scale_fill_viridis_d()+
  theme_bw()+bg+bg2+
  labs(x='Longitude', y='Latitude', fill='Mean vpd', title = '(B)')
  

mlpd <- ggplot()+
  geom_sf(data = ecof, aes(fill=meanlp))+
  scale_fill_viridis_d()+
  theme_bw()+bg+bg2+
  labs(x='Longitude', y='Latitude', fill='Mean lpd', title= '(C)')

mcpd <- ggplot()+
  geom_sf(data = ecof, aes(fill=meancp))+
  scale_fill_viridis_d()+
  theme_bw()+bg+bg2+
  labs(x='Longitude', y='Latitude', fill='Mean cpd', title = '(D)')


library(gridExtra)

gplots <- grid.arrange(vpdd, mvpd, mlpd, mcpd, nrow=2)#

ggsave(filename = "reviews/ecoregions.png", gplots, width = 6.5, 
       height = 6.8, dpi = 600, units ='in')
#==================

taxa <- youtube %>% filter(biota%in%'Yes')%>% 
  group_by(title, lowertaxa) %>% filter(!row_number() == 2)%>% 
group_by(hightaxa) %>% summarise(ct = length(hightaxa))

taxasum <- taxa%>% 
  group_by(hightaxa) %>% 
  summarise(totvideo = length(vpd),
            meanvpd = mean(vpd), meanlikes = mean(lpd), meanun = mean(dpd), 
            meancpd = mean(cpd, na.rm = T),
            sdvpd = sd(vpd)/sqrt(length(vpd)), 
            sdlikes = sd(lpd)/sqrt(length(vpd)),
            sdun = sd(dpd)/sqrt(length(vpd)),
            sdcpd = sd(cpd, na.rm=TRUE)/sqrt(length(cpd))) %>% 
  mutate(meanvpd1 = paste(round(meanvpd, 2),'±', round(sdvpd, 2)),
         meanlike1 = paste(round(meanlikes, 2),'±', round(sdlikes, 2)),
         meanun1 = paste(round(meanun, 2),'±', round(sdun, 2)),
         meancpd1 = paste(round(meancpd, 2),'±', round(sdcpd, 2))) %>% 
  dplyr::select(1, 2, 11:14)
xlsx::write.xlsx(taxasum, 'meanacrosstaxa.xlsx')

kruskal.test(vpd~hightaxa, data = taxa)

kruskal.test(lpd~hightaxa, data = taxa)

kruskal.test(dpd~hightaxa, data = taxa)

kruskal.test(cpd~hightaxa, data = taxa)
dn <- as.data.frame(dunn.test::dunn.test(taxa$vpd, taxa$hightaxa,list = TRUE))%>%
  filter(P<=0.025)

write.csv(dn, 'reviews/differences.csv', row.names = F)

#=======
#Specific taxa
taxaspec <- taxa%>% group_by(lowertaxa) %>% mutate(ct=length(lowertaxa)) %>% 
  filter(ct!=1, !is.na(lowertaxa)) #%>% 
  #group_by(lowertaxa) %>% summarise(ct = length(lowertaxa))

write.csv(taxaspec, 'lowertaxacount.csv', row.names = F)

taxasum <- taxaspec %>% 
  group_by(lowertaxa) %>% 
  summarise(totvideo = length(vpd),
            meanvpd = mean(vpd), meanlikes = mean(lpd), meanun = mean(dpd), 
            meancpd = mean(cpd, na.rm = T),
            sdvpd = sd(vpd)/sqrt(length(vpd)), 
            sdlikes = sd(lpd)/sqrt(length(vpd)),
            sdun = sd(dpd)/sqrt(length(vpd)),
            sdcpd = sd(cpd, na.rm=TRUE)/sqrt(length(cpd)))

kruskal.test(vpd~lowertaxa, data = taxaspec)

kruskal.test(lpd~lowertaxa, data = taxaspec)

kruskal.test(dpd~lowertaxa, data = taxaspec)

kruskal.test(cpd~lowertaxa, data = taxaspec)

dn <- as.data.frame(dunn.test::dunn.test(taxaspec$vpd, taxaspec$lowertaxa,list = TRUE))%>%
  filter(P<=0.025)
write.csv(dn, 'reviews/lowertaxadifferences.csv', row.names = F)

#================================

#graphical presentation
#======================
taxaplot <- taxaspec %>% 
  dplyr::select(hightaxa, lowertaxa, vpd, lpd, cpd) %>%
  group_by(hightaxa) %>% mutate(nv = length(vpd)) %>% 
  gather(cates, cvalue, -c(hightaxa, nv, lowertaxa)) %>% 
  group_by(hightaxa, cates, lowertaxa) %>% 
  summarise(nv = log(mean(nv)),
            meancates = log(mean(cvalue, na.rm = T)),
            sdvpd = log(sd(cvalue, na.rm = T)/sqrt(length(cvalue)))) %>% 
  mutate(cates = case_when(cates=='vpd'~'Views per day',
                           cates=='lpd'~'Likes per day',
                           cates=='dpd'~'Dislikes per day',
                           TRUE~'Comments per day'),
         cates = factor(cates, levels=c("Views per day", "Likes per day", "Comments per day")),
         taxa= case_when(hightaxa=='Amphibians'~'Amph',
                         TRUE~lowertaxa))

#==============
ggplot(taxaplot, aes(x=reorder_within(lowertaxa, meancates, cates), y=meancates, fill=cates))+
  geom_bar(stat = 'identity')+coord_flip()+
  theme_bw()+
  scale_x_reordered()+
  facet_wrap(~cates, scales = 'free')+
  scale_fill_manual(values = c('grey20', 'grey50', 'grey80'))+
  scale_y_continuous(expand = expansion(mult=c(0,0.051))) +
  geom_errorbar(aes(ymin= meancates, ymax= meancates+sdvpd), width=0.3)+
  theme(text = element_text(family = 'Cambria', size = 11),
        axis.text.y = element_text(hjust = 0),
        legend.position = 'None',
        #axis.text.x = element_text(angle = 45, hjust = 1),
        strip.text.y = element_text(size = 7))+
  #geom_text(aes(label= nv, color=nv), size= 3.2, hjust = -7.2)+
  scale_color_gradient(low = 'grey30', high = 'grey5')+
  #scale_fill_viridis_d()+
  labs(y='Mean views, likes, and comments (±SE)', x='Lower taxonomic ranks')



ggplot(taxaplot, aes(x=reorder_within(lowertaxa, meancates, cates),
                     y=meancates, fill=cates, colour = cates, shape = cates))+
  geom_point(stat = 'identity')+coord_flip()+
  theme_bw()+bg+
  scale_x_reordered()+
  facet_wrap(~cates, scales = 'free')+
  scale_y_continuous(expand = expansion(mult=c(0,0.051))) +
  geom_errorbar(aes(ymin= meancates-sdvpd, ymax= meancates+sdvpd), width=0.3)+
  theme(text = element_text(size = 13),
        axis.text.y = element_text(hjust = 0),
        legend.position = 'None')+
  scale_color_manual(values = c('darkblue', 'darkcyan', 'darkorange4'))+
  geom_hline(yintercept = 0, color='grey50', linetype ='dotted')+
  labs(y='Logarithmic mean views, likes, and comments per day', x='Lower taxonomic ranks')

ggsave('reviews/subtaxa2.png', dpi = 1000, width=10.4, height= 6.4, units='in')



#1.Questions 1: Are governments harnessing the strength of YouTube in distributing info
govdata<- ytube %>% group_by(category)%>% 
  summarise(totvideo = length(vpd),
            meanvpd = mean(vpd), meanlikes = mean(lpd), meanun = mean(dpd), 
            meancpd = mean(cpd, na.rm = T),
            sdvpd = sd(vpd)/sqrt(length(vpd)), 
            sdlikes = sd(lpd)/sqrt(length(vpd)),
            sdun = sd(dpd)/sqrt(length(vpd)),
            sdcpd = sd(cpd, na.rm=TRUE)/sqrt(length(cpd))) %>% 
  mutate(meanvpd1 = paste(round(meanvpd, 2),'±', round(sdvpd, 2)),
         meanlike1 = paste(round(meanlikes, 2),'±', round(sdlikes, 2)),
         meanun1 = paste(round(meanun, 2),'±', round(sdun, 2)),
         meancpd1 = paste(round(meancpd, 2),'±', round(sdcpd, 2))) %>% 
  dplyr::select(1, 2, 11:14)


xlsx::write.xlsx(govdata, 'government1.xlsx')

kruskal.test(vpd~category, ytube)
kruskal.test(lpd~category, ytube)
kruskal.test(dpd~category, ytube)
kruskal.test(cpd~category, ytube)


#as.data.frame(
library("rstatix")
govcol <- ytube %>% ungroup()%>% dplyr::select(cpd, vpd, lpd, dpd, category) 

dtest <- list()
for (i in seq(1:4)) {
  a <- colnames(govcol)
  dtest[[i]] <- as.data.frame(dunn_test(govcol, as.formula(paste(a[i], a[5], sep="~")), 
                     p.adjust.method="BH"))
  df <- do.call(cbind, dtest)
}

xlsx::write.xlsx(df, 'channelcategoriesdunntest.xlsx')


#which words are put in the titles
titles <- iconv(ytube$title, to="utf-8")

cclean <- sapply(titles,function(row) 
  iconv(row, "latin1", "ASCII", sub=""))

corpus <-  Corpus(VectorSource(cclean))

corpus <-  tm_map(corpus, tolower)

corpus <-  tm_map(corpus, removePunctuation)

corpus <-  tm_map(corpus, removeNumbers)

ccorpus<-  tm_map(corpus, removeWords, stopwords('english'))

ccorpus<-  tm_map(ccorpus, gsub, pattern="chines",replacement="china")
ccorpus<-  tm_map(ccorpus, gsub, pattern="africas",replacement="africa")
ccorpus<-  tm_map(ccorpus, gsub, pattern="african",replacement="africa")
ccorpus<-  tm_map(ccorpus, gsub, pattern="extinction",replacement="extinct")
ccorpus<-  tm_map(ccorpus, gsub, pattern="invasions",replacement="invasive")
ccorpus<-  tm_map(ccorpus, gsub, pattern="invasion",replacement="invasive")
ccorpus<-  tm_map(ccorpus, gsub, pattern="invading",replacement="invasive")
ccorpus<-  tm_map(ccorpus, gsub, pattern="polluted",replacement="pollution")
ccorpus<-  tm_map(ccorpus, gsub, pattern="polluting",replacement="pollution")
ccorpus<-  tm_map(ccorpus, gsub, pattern="pollutionrelated",replacement="pollution")
#ccorpus<-  tm_map(ccorpus, gsub, pattern="killing",replacement="kill")
#ccorpus <- tm_map(ccorpus, stemDocument)

removeURL <- function(x)gsub('http[[:alnum:]]*','',x)

ccorpus<-  tm_map(ccorpus, content_transformer(removeURL))

ccorpus<-  tm_map(ccorpus, stripWhitespace)

#============================================
#convert into rows and column from unstructed data
tdm <- TermDocumentMatrix(ccorpus)

tdmatrix <- as.matrix(tdm)

words <-  rowSums(tdmatrix)

wfinal <-as.data.frame(subset(words, words>=5))%>%
  tibble::rownames_to_column("word") %>% 
  filter(!word %in% c('africa', 'biodiversity', 'conservation', 'south'))

worddf <- setNames(wfinal, c('word', 'freq'))

set.seed(1144)
ggplot(worddf, aes(label = word, size=freq, color=freq)) +
  geom_text_wordcloud_area(family = "Calibri", shape = 'circle')+
  scale_size_area(max_size = 16)+theme_bw()+
  theme_minimal()+scale_colour_viridis_b(direction = 1)+
  theme(text = element_text(family = 'Helvetica', size = 11))#+
  #ggtitle('Topical words')

ggsave('words.png', dpi = 600, width=5.0, height=4.3, units='in',bg="white")

library(wordcloud)
set.seed(1234) # for reproducibility 
wordcloud(words = worddf$word, freq = worddf$freq, min.freq = 1,           
          max.words=200, random.order=FALSE, rot.per=0.65,            
          colors=brewer.pal(8, "Dark2"))


#generalised linear model

y <- lm(views~likes+unlikes+subscriber+ddiff+comments+Duration+title_len, data = ytube)

table <- as.data.frame(summary(y)$coefficients) %>% 
  tibble::rownames_to_column("parameters") %>% 
  setNames(c('parameter', 'estimate', 'stderror', 'tvalue', 'pvalue')) %>% 
  ITNr::round_df(digits = 3)

xlsx::write.xlsx(table, 'glmoutput.xlsx')

#Origin of the countries 
odata  <- ytube  %>% group_by(origin) %>% summarise(cts= length(origin))


orign <- admbound %>%
  dplyr::select(SOVEREIGNT) %>% 
  left_join(y=odata %>% rename(SOVEREIGNT=origin), by='SOVEREIGNT') %>% 
  filter(!SOVEREIGNT=='Antarctica') %>% replace(., is.na(.), 0)#%>% group_by(SOVEREIGNT) %>%
  #slice(which.max(cts))


ggplot(orign)+
  geom_sf(aes(fill=cts))+coord_sf(expand = FALSE)+
  scale_fill_viridis_c(direction = 1, option = 'B' )+
  theme_bw()+bg+
  theme(legend.position.inside = c(0.1, 0.3),
        legend.position = 'inside')+
  #theme(text = element_text(family = 'Cambria', size = 11))+
  #suppressWarnings(annotation_scale(aes(location = "bl", width_hint = 0.3))) +
  # annotation_north_arrow(location = "bl", which_north = "true",
  #                        pad_x = unit(0.0, "in"), pad_y = unit(0.2, "in"),
  #                        style = north_arrow_fancy_orienteering)+
  labs(y='Latitude', x='Longitude', fill='Channels')
  
dc <- tmaptools::get_asp_ratio(orign)

ggsave('reviews/locations.png', dpi = 1000, width = dc*3, 
       height = 4.1, units = 'in')