Reshape GET Request to Long Data Frame in R

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
2
down vote

favorite












Overview



Converting the content of the httr::GET() request from json into an r data frame required a few data manipulation steps to reshape the data to long format prior to exporting the data as a .csv file.



Syntax and Development Methodology



Please note that I am using Google's R Style Guide and I recently finished reading The Pragmatic Programmer.



Request



I know that I have not incorporated all of the authors' recommendations into this code. Any and all comments, suggestions, and advice is welcome and appreciated - especially those that come from this awesome book. Also, if there are any bad habits that you notice, please let me know. Thank you for your time and your help!



R code



# load necessary packages
library( httr )
library( jsonlite )

# GET a url
# This is taking awhile
# wait time is ~4 minutes
get.request <- httr::GET( url = "http://api.erg.kcl.ac.uk/AirQuality/Daily/MonitoringIndex/Latest/GroupName=London/JSON" )

# transfrom get.request
# into character string
get.request <- rawToChar( x = get.request$content )

# Transfrom JSON string
# into data frame
get.request.list <-
fromJSON( txt = get.request
, flatten = TRUE
)
# get names of objects
names( get.request.list$DailyAirQualityIndex )
# [1] "@MonitoringIndexDate" "@GroupName"
# [3] "@TimeToLive" "LocalAuthority"

# store meta data
meta.data <-
data.frame(
MonitoringIndexDate = get.request.list$DailyAirQualityIndex$`@MonitoringIndexDate`
, GroupName = get.request.list$DailyAirQualityIndex$`@GroupName`
, TimeToLive = get.request.list$DailyAirQualityIndex$`@TimeToLive`
, stringsAsFactors = FALSE
)

# store the local authority data
local.authority <-
get.request.list$DailyAirQualityIndex$LocalAuthority

# add meta data
# onto local.authority.df
local.authority <-
cbind(
meta.data
, local.authority
)

# delete the list within
# this data frame
local.authority$Site <- NULL

# check dim
dim( local.authority ) # [1] 33 9

# transform each row
# into a list
local.authority <-
split(
x = local.authority
, f = seq_len(
length.out = nrow( local.authority )
)
)

# know relevant column names
# Each local authority may have more than one site
names( get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]] )
# [1] "@BulletinDate" "@SiteCode" "@SiteName"
# [4] "@SiteType" "@Latitude" "@Longitude"
# [7] "@LatitudeWGS84" "@LongitudeWGS84" "Species"

# wait, it looks like there's another list
# within this list
names(
get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]]$Species
)
# [1] "@SpeciesCode" "@SpeciesDescription" "@AirQualityIndex"
# [4] "@AirQualityBand" "@IndexSource"

# placeholder data
# will be placed where there
# are NULL elements
# in the list objects for
# the site and species information
placeholder.df <-
data.frame(
a = NA
, b = NA
, c = NA
, d = NA
, e = NA
, f = NA
, g = NA
, h = NA
, i = NA
, j = NA
, k = NA
, m = NA
, n = NA
)

# set the column names to match
# those in
# get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]]
placeholder.df <-
setNames(
object = placeholder.df
, nm = c(
names( get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]] )[ 1:8 ]
, names( get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]]$Species )
)
)

# store the site information
# in a list
# note: one list accidently contains the species information
# in it.
site.list <-
lapply( X = get.request.list$DailyAirQualityIndex$LocalAuthority$Site
, FUN = function( i )
if( length( i ) == 9 & class( i ) == "data.frame" )
i[ 1:8 ]
else if( length( i ) == 13 & class( i ) == "data.frame" )
i[ 1:8 ]
else if( length( i ) == 9 & class( i ) == "list" )
do.call(
what = cbind.data.frame
, args = i[ 1:8 ]
)
else
placeholder.df[ 1, ][ 1:8 ]

)

# store the species information for that
# one outlier list
accidential.species <-
get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[24]][ 9:13 ]

colnames( accidential.species ) <-
colnames( placeholder.df )[ 9:13 ]

# store the species found
# at each site
species.list <-
lapply( X = get.request.list$DailyAirQualityIndex$LocalAuthority$Site
, FUN = function( i )
if( length( i ) == 9 )
i[[ 9 ]]
else
placeholder.df[ 1, ][ 9:13 ]

)

# insert the accidential.species information
species.list[[24]] <-
accidential.species

# create a condition
# that indicates TRUE when an object
# in species.list is NOT a data frame
non.data.frame.condition <-
which(
sapply( X = species.list
, FUN = function( i ) !is.data.frame( i )
)
)

# use the condition to test
# if each object in the list
# is a data frame; if not,
# recast it as one.
species.list[ non.data.frame.condition ] <-
lapply(
X = species.list[ non.data.frame.condition ]
, FUN = function( i )
lapply(
X = i
, FUN = function( j )
if( is.data.frame( j ) == TRUE )
j
else
cbind.data.frame( j )


)
)



# now its time to
# retrieve AirQualityIndex information
# for each site, where each site
# may measure more than zero or more than one type of specices
site.species.list <-
vector( mode = "list", length = 33 )

# create condition
# that identifies the indices
# of objects in both site.list and species.list
# that containt the same number of rows
# this catches two characteristics:
# 1. objects that represent a single site (real and placeholder data)
# 2. objects that have 1 species per site
single.site.or.one.site.per.species.condition <-
which(
mapply( FUN = function( x, y)
identical( nrow( x ), nrow( y ) ) |
nrow( x ) == 1
, site.list
, species.list
, SIMPLIFY = TRUE
)
)

# cbind the two lists
# that meet the single.site.or.one.site.per.species.condition
site.species.list[ single.site.or.one.site.per.species.condition ] <-
mapply(
FUN = function( i, j )
cbind( i, j)
, site.list[ single.site.or.one.site.per.species.condition ]
, species.list[ single.site.or.one.site.per.species.condition ]
, SIMPLIFY = FALSE
)

# now, iteratively add rows & columns to
# each data frame in site.list
# based on species.list
# that do not meet the single.site.or.one.site.per.species.condition
site.species.list[ -single.site.or.one.site.per.species.condition ] <-
mapply( FUN = function( i, j )
do.call(
what = "rbind"
, args = Map(
f = "cbind"
, split(
x = i
, f = seq_len( length.out = nrow( i ) )
)
, j
)
)
, site.list[ -single.site.or.one.site.per.species.condition ]
, species.list[ -single.site.or.one.site.per.species.condition ]
, SIMPLIFY = FALSE
)

# There were 50 or more warnings (use warnings() to see the first 50)
# warnings()
# Warning messages:
# 1: In data.frame(..., check.names = FALSE) :
# row names were found from a short variable and have been discarded
#

# add the local authority
# information onto each object
# within site.species.list
site.species.list <-
Map(
f = "cbind"
, local.authority
, site.species.list
)

# There were 25 warnings (use warnings() to see them)
# warnings()
# Warning messages:
# 1: In data.frame(..., check.names = FALSE) :
# row names were found from a short variable and have been discarded

# assign the same column names to each data frame
site.species.list <-
lapply(
X = site.species.list
, FUN = function( i )
names( i ) <-
names( site.species.list[[2]] )
return( i )

)

# collapse the individual objects
# in the list into one data frame
site.species.df <-
data.frame(
do.call(
what = "rbind"
, args = site.species.list
)
, stringsAsFactors = FALSE
)

# check dim
dim( site.species.df ) # [1] 183 22

# rename the rows
rownames( x = site.species.df) <-
as.character( x = 1:nrow( x = site.species.df ) )

# Make syntactically valid column names
colnames( x = site.species.df ) <-
base::gsub( pattern = "X."
, replacement = ""
, x = colnames( site.species.df )
)

# export results as csv file
write.csv(
x = site.species.df
, row.names = FALSE
, file = paste0( Sys.Date(), "-London_Air_Quality.csv" )
)

# end of script #






share|improve this question

















  • 1




    hey @aspiringurbandatascientist, are you still looking for a review for this?
    – Chris
    Jun 7 at 3:33










  • @Chris - while looking at this code definitely made me cringe, I would love another pair of eyes to look this over. I might even post an answer myself after I've learned the tidyverse and other helpful coding mechanics via courses on DataCamp. Thank you for taking the time to offer feedback!
    – Cristian E. Nuno
    Jun 7 at 14:06

















up vote
2
down vote

favorite












Overview



Converting the content of the httr::GET() request from json into an r data frame required a few data manipulation steps to reshape the data to long format prior to exporting the data as a .csv file.



Syntax and Development Methodology



Please note that I am using Google's R Style Guide and I recently finished reading The Pragmatic Programmer.



Request



I know that I have not incorporated all of the authors' recommendations into this code. Any and all comments, suggestions, and advice is welcome and appreciated - especially those that come from this awesome book. Also, if there are any bad habits that you notice, please let me know. Thank you for your time and your help!



R code



# load necessary packages
library( httr )
library( jsonlite )

# GET a url
# This is taking awhile
# wait time is ~4 minutes
get.request <- httr::GET( url = "http://api.erg.kcl.ac.uk/AirQuality/Daily/MonitoringIndex/Latest/GroupName=London/JSON" )

# transfrom get.request
# into character string
get.request <- rawToChar( x = get.request$content )

# Transfrom JSON string
# into data frame
get.request.list <-
fromJSON( txt = get.request
, flatten = TRUE
)
# get names of objects
names( get.request.list$DailyAirQualityIndex )
# [1] "@MonitoringIndexDate" "@GroupName"
# [3] "@TimeToLive" "LocalAuthority"

# store meta data
meta.data <-
data.frame(
MonitoringIndexDate = get.request.list$DailyAirQualityIndex$`@MonitoringIndexDate`
, GroupName = get.request.list$DailyAirQualityIndex$`@GroupName`
, TimeToLive = get.request.list$DailyAirQualityIndex$`@TimeToLive`
, stringsAsFactors = FALSE
)

# store the local authority data
local.authority <-
get.request.list$DailyAirQualityIndex$LocalAuthority

# add meta data
# onto local.authority.df
local.authority <-
cbind(
meta.data
, local.authority
)

# delete the list within
# this data frame
local.authority$Site <- NULL

# check dim
dim( local.authority ) # [1] 33 9

# transform each row
# into a list
local.authority <-
split(
x = local.authority
, f = seq_len(
length.out = nrow( local.authority )
)
)

# know relevant column names
# Each local authority may have more than one site
names( get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]] )
# [1] "@BulletinDate" "@SiteCode" "@SiteName"
# [4] "@SiteType" "@Latitude" "@Longitude"
# [7] "@LatitudeWGS84" "@LongitudeWGS84" "Species"

# wait, it looks like there's another list
# within this list
names(
get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]]$Species
)
# [1] "@SpeciesCode" "@SpeciesDescription" "@AirQualityIndex"
# [4] "@AirQualityBand" "@IndexSource"

# placeholder data
# will be placed where there
# are NULL elements
# in the list objects for
# the site and species information
placeholder.df <-
data.frame(
a = NA
, b = NA
, c = NA
, d = NA
, e = NA
, f = NA
, g = NA
, h = NA
, i = NA
, j = NA
, k = NA
, m = NA
, n = NA
)

# set the column names to match
# those in
# get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]]
placeholder.df <-
setNames(
object = placeholder.df
, nm = c(
names( get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]] )[ 1:8 ]
, names( get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]]$Species )
)
)

# store the site information
# in a list
# note: one list accidently contains the species information
# in it.
site.list <-
lapply( X = get.request.list$DailyAirQualityIndex$LocalAuthority$Site
, FUN = function( i )
if( length( i ) == 9 & class( i ) == "data.frame" )
i[ 1:8 ]
else if( length( i ) == 13 & class( i ) == "data.frame" )
i[ 1:8 ]
else if( length( i ) == 9 & class( i ) == "list" )
do.call(
what = cbind.data.frame
, args = i[ 1:8 ]
)
else
placeholder.df[ 1, ][ 1:8 ]

)

# store the species information for that
# one outlier list
accidential.species <-
get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[24]][ 9:13 ]

colnames( accidential.species ) <-
colnames( placeholder.df )[ 9:13 ]

# store the species found
# at each site
species.list <-
lapply( X = get.request.list$DailyAirQualityIndex$LocalAuthority$Site
, FUN = function( i )
if( length( i ) == 9 )
i[[ 9 ]]
else
placeholder.df[ 1, ][ 9:13 ]

)

# insert the accidential.species information
species.list[[24]] <-
accidential.species

# create a condition
# that indicates TRUE when an object
# in species.list is NOT a data frame
non.data.frame.condition <-
which(
sapply( X = species.list
, FUN = function( i ) !is.data.frame( i )
)
)

# use the condition to test
# if each object in the list
# is a data frame; if not,
# recast it as one.
species.list[ non.data.frame.condition ] <-
lapply(
X = species.list[ non.data.frame.condition ]
, FUN = function( i )
lapply(
X = i
, FUN = function( j )
if( is.data.frame( j ) == TRUE )
j
else
cbind.data.frame( j )


)
)



# now its time to
# retrieve AirQualityIndex information
# for each site, where each site
# may measure more than zero or more than one type of specices
site.species.list <-
vector( mode = "list", length = 33 )

# create condition
# that identifies the indices
# of objects in both site.list and species.list
# that containt the same number of rows
# this catches two characteristics:
# 1. objects that represent a single site (real and placeholder data)
# 2. objects that have 1 species per site
single.site.or.one.site.per.species.condition <-
which(
mapply( FUN = function( x, y)
identical( nrow( x ), nrow( y ) ) |
nrow( x ) == 1
, site.list
, species.list
, SIMPLIFY = TRUE
)
)

# cbind the two lists
# that meet the single.site.or.one.site.per.species.condition
site.species.list[ single.site.or.one.site.per.species.condition ] <-
mapply(
FUN = function( i, j )
cbind( i, j)
, site.list[ single.site.or.one.site.per.species.condition ]
, species.list[ single.site.or.one.site.per.species.condition ]
, SIMPLIFY = FALSE
)

# now, iteratively add rows & columns to
# each data frame in site.list
# based on species.list
# that do not meet the single.site.or.one.site.per.species.condition
site.species.list[ -single.site.or.one.site.per.species.condition ] <-
mapply( FUN = function( i, j )
do.call(
what = "rbind"
, args = Map(
f = "cbind"
, split(
x = i
, f = seq_len( length.out = nrow( i ) )
)
, j
)
)
, site.list[ -single.site.or.one.site.per.species.condition ]
, species.list[ -single.site.or.one.site.per.species.condition ]
, SIMPLIFY = FALSE
)

# There were 50 or more warnings (use warnings() to see the first 50)
# warnings()
# Warning messages:
# 1: In data.frame(..., check.names = FALSE) :
# row names were found from a short variable and have been discarded
#

# add the local authority
# information onto each object
# within site.species.list
site.species.list <-
Map(
f = "cbind"
, local.authority
, site.species.list
)

# There were 25 warnings (use warnings() to see them)
# warnings()
# Warning messages:
# 1: In data.frame(..., check.names = FALSE) :
# row names were found from a short variable and have been discarded

# assign the same column names to each data frame
site.species.list <-
lapply(
X = site.species.list
, FUN = function( i )
names( i ) <-
names( site.species.list[[2]] )
return( i )

)

# collapse the individual objects
# in the list into one data frame
site.species.df <-
data.frame(
do.call(
what = "rbind"
, args = site.species.list
)
, stringsAsFactors = FALSE
)

# check dim
dim( site.species.df ) # [1] 183 22

# rename the rows
rownames( x = site.species.df) <-
as.character( x = 1:nrow( x = site.species.df ) )

# Make syntactically valid column names
colnames( x = site.species.df ) <-
base::gsub( pattern = "X."
, replacement = ""
, x = colnames( site.species.df )
)

# export results as csv file
write.csv(
x = site.species.df
, row.names = FALSE
, file = paste0( Sys.Date(), "-London_Air_Quality.csv" )
)

# end of script #






share|improve this question

















  • 1




    hey @aspiringurbandatascientist, are you still looking for a review for this?
    – Chris
    Jun 7 at 3:33










  • @Chris - while looking at this code definitely made me cringe, I would love another pair of eyes to look this over. I might even post an answer myself after I've learned the tidyverse and other helpful coding mechanics via courses on DataCamp. Thank you for taking the time to offer feedback!
    – Cristian E. Nuno
    Jun 7 at 14:06













up vote
2
down vote

favorite









up vote
2
down vote

favorite











Overview



Converting the content of the httr::GET() request from json into an r data frame required a few data manipulation steps to reshape the data to long format prior to exporting the data as a .csv file.



Syntax and Development Methodology



Please note that I am using Google's R Style Guide and I recently finished reading The Pragmatic Programmer.



Request



I know that I have not incorporated all of the authors' recommendations into this code. Any and all comments, suggestions, and advice is welcome and appreciated - especially those that come from this awesome book. Also, if there are any bad habits that you notice, please let me know. Thank you for your time and your help!



R code



# load necessary packages
library( httr )
library( jsonlite )

# GET a url
# This is taking awhile
# wait time is ~4 minutes
get.request <- httr::GET( url = "http://api.erg.kcl.ac.uk/AirQuality/Daily/MonitoringIndex/Latest/GroupName=London/JSON" )

# transfrom get.request
# into character string
get.request <- rawToChar( x = get.request$content )

# Transfrom JSON string
# into data frame
get.request.list <-
fromJSON( txt = get.request
, flatten = TRUE
)
# get names of objects
names( get.request.list$DailyAirQualityIndex )
# [1] "@MonitoringIndexDate" "@GroupName"
# [3] "@TimeToLive" "LocalAuthority"

# store meta data
meta.data <-
data.frame(
MonitoringIndexDate = get.request.list$DailyAirQualityIndex$`@MonitoringIndexDate`
, GroupName = get.request.list$DailyAirQualityIndex$`@GroupName`
, TimeToLive = get.request.list$DailyAirQualityIndex$`@TimeToLive`
, stringsAsFactors = FALSE
)

# store the local authority data
local.authority <-
get.request.list$DailyAirQualityIndex$LocalAuthority

# add meta data
# onto local.authority.df
local.authority <-
cbind(
meta.data
, local.authority
)

# delete the list within
# this data frame
local.authority$Site <- NULL

# check dim
dim( local.authority ) # [1] 33 9

# transform each row
# into a list
local.authority <-
split(
x = local.authority
, f = seq_len(
length.out = nrow( local.authority )
)
)

# know relevant column names
# Each local authority may have more than one site
names( get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]] )
# [1] "@BulletinDate" "@SiteCode" "@SiteName"
# [4] "@SiteType" "@Latitude" "@Longitude"
# [7] "@LatitudeWGS84" "@LongitudeWGS84" "Species"

# wait, it looks like there's another list
# within this list
names(
get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]]$Species
)
# [1] "@SpeciesCode" "@SpeciesDescription" "@AirQualityIndex"
# [4] "@AirQualityBand" "@IndexSource"

# placeholder data
# will be placed where there
# are NULL elements
# in the list objects for
# the site and species information
placeholder.df <-
data.frame(
a = NA
, b = NA
, c = NA
, d = NA
, e = NA
, f = NA
, g = NA
, h = NA
, i = NA
, j = NA
, k = NA
, m = NA
, n = NA
)

# set the column names to match
# those in
# get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]]
placeholder.df <-
setNames(
object = placeholder.df
, nm = c(
names( get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]] )[ 1:8 ]
, names( get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]]$Species )
)
)

# store the site information
# in a list
# note: one list accidently contains the species information
# in it.
site.list <-
lapply( X = get.request.list$DailyAirQualityIndex$LocalAuthority$Site
, FUN = function( i )
if( length( i ) == 9 & class( i ) == "data.frame" )
i[ 1:8 ]
else if( length( i ) == 13 & class( i ) == "data.frame" )
i[ 1:8 ]
else if( length( i ) == 9 & class( i ) == "list" )
do.call(
what = cbind.data.frame
, args = i[ 1:8 ]
)
else
placeholder.df[ 1, ][ 1:8 ]

)

# store the species information for that
# one outlier list
accidential.species <-
get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[24]][ 9:13 ]

colnames( accidential.species ) <-
colnames( placeholder.df )[ 9:13 ]

# store the species found
# at each site
species.list <-
lapply( X = get.request.list$DailyAirQualityIndex$LocalAuthority$Site
, FUN = function( i )
if( length( i ) == 9 )
i[[ 9 ]]
else
placeholder.df[ 1, ][ 9:13 ]

)

# insert the accidential.species information
species.list[[24]] <-
accidential.species

# create a condition
# that indicates TRUE when an object
# in species.list is NOT a data frame
non.data.frame.condition <-
which(
sapply( X = species.list
, FUN = function( i ) !is.data.frame( i )
)
)

# use the condition to test
# if each object in the list
# is a data frame; if not,
# recast it as one.
species.list[ non.data.frame.condition ] <-
lapply(
X = species.list[ non.data.frame.condition ]
, FUN = function( i )
lapply(
X = i
, FUN = function( j )
if( is.data.frame( j ) == TRUE )
j
else
cbind.data.frame( j )


)
)



# now its time to
# retrieve AirQualityIndex information
# for each site, where each site
# may measure more than zero or more than one type of specices
site.species.list <-
vector( mode = "list", length = 33 )

# create condition
# that identifies the indices
# of objects in both site.list and species.list
# that containt the same number of rows
# this catches two characteristics:
# 1. objects that represent a single site (real and placeholder data)
# 2. objects that have 1 species per site
single.site.or.one.site.per.species.condition <-
which(
mapply( FUN = function( x, y)
identical( nrow( x ), nrow( y ) ) |
nrow( x ) == 1
, site.list
, species.list
, SIMPLIFY = TRUE
)
)

# cbind the two lists
# that meet the single.site.or.one.site.per.species.condition
site.species.list[ single.site.or.one.site.per.species.condition ] <-
mapply(
FUN = function( i, j )
cbind( i, j)
, site.list[ single.site.or.one.site.per.species.condition ]
, species.list[ single.site.or.one.site.per.species.condition ]
, SIMPLIFY = FALSE
)

# now, iteratively add rows & columns to
# each data frame in site.list
# based on species.list
# that do not meet the single.site.or.one.site.per.species.condition
site.species.list[ -single.site.or.one.site.per.species.condition ] <-
mapply( FUN = function( i, j )
do.call(
what = "rbind"
, args = Map(
f = "cbind"
, split(
x = i
, f = seq_len( length.out = nrow( i ) )
)
, j
)
)
, site.list[ -single.site.or.one.site.per.species.condition ]
, species.list[ -single.site.or.one.site.per.species.condition ]
, SIMPLIFY = FALSE
)

# There were 50 or more warnings (use warnings() to see the first 50)
# warnings()
# Warning messages:
# 1: In data.frame(..., check.names = FALSE) :
# row names were found from a short variable and have been discarded
#

# add the local authority
# information onto each object
# within site.species.list
site.species.list <-
Map(
f = "cbind"
, local.authority
, site.species.list
)

# There were 25 warnings (use warnings() to see them)
# warnings()
# Warning messages:
# 1: In data.frame(..., check.names = FALSE) :
# row names were found from a short variable and have been discarded

# assign the same column names to each data frame
site.species.list <-
lapply(
X = site.species.list
, FUN = function( i )
names( i ) <-
names( site.species.list[[2]] )
return( i )

)

# collapse the individual objects
# in the list into one data frame
site.species.df <-
data.frame(
do.call(
what = "rbind"
, args = site.species.list
)
, stringsAsFactors = FALSE
)

# check dim
dim( site.species.df ) # [1] 183 22

# rename the rows
rownames( x = site.species.df) <-
as.character( x = 1:nrow( x = site.species.df ) )

# Make syntactically valid column names
colnames( x = site.species.df ) <-
base::gsub( pattern = "X."
, replacement = ""
, x = colnames( site.species.df )
)

# export results as csv file
write.csv(
x = site.species.df
, row.names = FALSE
, file = paste0( Sys.Date(), "-London_Air_Quality.csv" )
)

# end of script #






share|improve this question













Overview



Converting the content of the httr::GET() request from json into an r data frame required a few data manipulation steps to reshape the data to long format prior to exporting the data as a .csv file.



Syntax and Development Methodology



Please note that I am using Google's R Style Guide and I recently finished reading The Pragmatic Programmer.



Request



I know that I have not incorporated all of the authors' recommendations into this code. Any and all comments, suggestions, and advice is welcome and appreciated - especially those that come from this awesome book. Also, if there are any bad habits that you notice, please let me know. Thank you for your time and your help!



R code



# load necessary packages
library( httr )
library( jsonlite )

# GET a url
# This is taking awhile
# wait time is ~4 minutes
get.request <- httr::GET( url = "http://api.erg.kcl.ac.uk/AirQuality/Daily/MonitoringIndex/Latest/GroupName=London/JSON" )

# transfrom get.request
# into character string
get.request <- rawToChar( x = get.request$content )

# Transfrom JSON string
# into data frame
get.request.list <-
fromJSON( txt = get.request
, flatten = TRUE
)
# get names of objects
names( get.request.list$DailyAirQualityIndex )
# [1] "@MonitoringIndexDate" "@GroupName"
# [3] "@TimeToLive" "LocalAuthority"

# store meta data
meta.data <-
data.frame(
MonitoringIndexDate = get.request.list$DailyAirQualityIndex$`@MonitoringIndexDate`
, GroupName = get.request.list$DailyAirQualityIndex$`@GroupName`
, TimeToLive = get.request.list$DailyAirQualityIndex$`@TimeToLive`
, stringsAsFactors = FALSE
)

# store the local authority data
local.authority <-
get.request.list$DailyAirQualityIndex$LocalAuthority

# add meta data
# onto local.authority.df
local.authority <-
cbind(
meta.data
, local.authority
)

# delete the list within
# this data frame
local.authority$Site <- NULL

# check dim
dim( local.authority ) # [1] 33 9

# transform each row
# into a list
local.authority <-
split(
x = local.authority
, f = seq_len(
length.out = nrow( local.authority )
)
)

# know relevant column names
# Each local authority may have more than one site
names( get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]] )
# [1] "@BulletinDate" "@SiteCode" "@SiteName"
# [4] "@SiteType" "@Latitude" "@Longitude"
# [7] "@LatitudeWGS84" "@LongitudeWGS84" "Species"

# wait, it looks like there's another list
# within this list
names(
get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]]$Species
)
# [1] "@SpeciesCode" "@SpeciesDescription" "@AirQualityIndex"
# [4] "@AirQualityBand" "@IndexSource"

# placeholder data
# will be placed where there
# are NULL elements
# in the list objects for
# the site and species information
placeholder.df <-
data.frame(
a = NA
, b = NA
, c = NA
, d = NA
, e = NA
, f = NA
, g = NA
, h = NA
, i = NA
, j = NA
, k = NA
, m = NA
, n = NA
)

# set the column names to match
# those in
# get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]]
placeholder.df <-
setNames(
object = placeholder.df
, nm = c(
names( get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]] )[ 1:8 ]
, names( get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[1]]$Species )
)
)

# store the site information
# in a list
# note: one list accidently contains the species information
# in it.
site.list <-
lapply( X = get.request.list$DailyAirQualityIndex$LocalAuthority$Site
, FUN = function( i )
if( length( i ) == 9 & class( i ) == "data.frame" )
i[ 1:8 ]
else if( length( i ) == 13 & class( i ) == "data.frame" )
i[ 1:8 ]
else if( length( i ) == 9 & class( i ) == "list" )
do.call(
what = cbind.data.frame
, args = i[ 1:8 ]
)
else
placeholder.df[ 1, ][ 1:8 ]

)

# store the species information for that
# one outlier list
accidential.species <-
get.request.list$DailyAirQualityIndex$LocalAuthority$Site[[24]][ 9:13 ]

colnames( accidential.species ) <-
colnames( placeholder.df )[ 9:13 ]

# store the species found
# at each site
species.list <-
lapply( X = get.request.list$DailyAirQualityIndex$LocalAuthority$Site
, FUN = function( i )
if( length( i ) == 9 )
i[[ 9 ]]
else
placeholder.df[ 1, ][ 9:13 ]

)

# insert the accidential.species information
species.list[[24]] <-
accidential.species

# create a condition
# that indicates TRUE when an object
# in species.list is NOT a data frame
non.data.frame.condition <-
which(
sapply( X = species.list
, FUN = function( i ) !is.data.frame( i )
)
)

# use the condition to test
# if each object in the list
# is a data frame; if not,
# recast it as one.
species.list[ non.data.frame.condition ] <-
lapply(
X = species.list[ non.data.frame.condition ]
, FUN = function( i )
lapply(
X = i
, FUN = function( j )
if( is.data.frame( j ) == TRUE )
j
else
cbind.data.frame( j )


)
)



# now its time to
# retrieve AirQualityIndex information
# for each site, where each site
# may measure more than zero or more than one type of specices
site.species.list <-
vector( mode = "list", length = 33 )

# create condition
# that identifies the indices
# of objects in both site.list and species.list
# that containt the same number of rows
# this catches two characteristics:
# 1. objects that represent a single site (real and placeholder data)
# 2. objects that have 1 species per site
single.site.or.one.site.per.species.condition <-
which(
mapply( FUN = function( x, y)
identical( nrow( x ), nrow( y ) ) |
nrow( x ) == 1
, site.list
, species.list
, SIMPLIFY = TRUE
)
)

# cbind the two lists
# that meet the single.site.or.one.site.per.species.condition
site.species.list[ single.site.or.one.site.per.species.condition ] <-
mapply(
FUN = function( i, j )
cbind( i, j)
, site.list[ single.site.or.one.site.per.species.condition ]
, species.list[ single.site.or.one.site.per.species.condition ]
, SIMPLIFY = FALSE
)

# now, iteratively add rows & columns to
# each data frame in site.list
# based on species.list
# that do not meet the single.site.or.one.site.per.species.condition
site.species.list[ -single.site.or.one.site.per.species.condition ] <-
mapply( FUN = function( i, j )
do.call(
what = "rbind"
, args = Map(
f = "cbind"
, split(
x = i
, f = seq_len( length.out = nrow( i ) )
)
, j
)
)
, site.list[ -single.site.or.one.site.per.species.condition ]
, species.list[ -single.site.or.one.site.per.species.condition ]
, SIMPLIFY = FALSE
)

# There were 50 or more warnings (use warnings() to see the first 50)
# warnings()
# Warning messages:
# 1: In data.frame(..., check.names = FALSE) :
# row names were found from a short variable and have been discarded
#

# add the local authority
# information onto each object
# within site.species.list
site.species.list <-
Map(
f = "cbind"
, local.authority
, site.species.list
)

# There were 25 warnings (use warnings() to see them)
# warnings()
# Warning messages:
# 1: In data.frame(..., check.names = FALSE) :
# row names were found from a short variable and have been discarded

# assign the same column names to each data frame
site.species.list <-
lapply(
X = site.species.list
, FUN = function( i )
names( i ) <-
names( site.species.list[[2]] )
return( i )

)

# collapse the individual objects
# in the list into one data frame
site.species.df <-
data.frame(
do.call(
what = "rbind"
, args = site.species.list
)
, stringsAsFactors = FALSE
)

# check dim
dim( site.species.df ) # [1] 183 22

# rename the rows
rownames( x = site.species.df) <-
as.character( x = 1:nrow( x = site.species.df ) )

# Make syntactically valid column names
colnames( x = site.species.df ) <-
base::gsub( pattern = "X."
, replacement = ""
, x = colnames( site.species.df )
)

# export results as csv file
write.csv(
x = site.species.df
, row.names = FALSE
, file = paste0( Sys.Date(), "-London_Air_Quality.csv" )
)

# end of script #








share|improve this question












share|improve this question




share|improve this question








edited Feb 26 at 19:37
























asked Feb 26 at 0:48









Cristian E. Nuno

1115




1115







  • 1




    hey @aspiringurbandatascientist, are you still looking for a review for this?
    – Chris
    Jun 7 at 3:33










  • @Chris - while looking at this code definitely made me cringe, I would love another pair of eyes to look this over. I might even post an answer myself after I've learned the tidyverse and other helpful coding mechanics via courses on DataCamp. Thank you for taking the time to offer feedback!
    – Cristian E. Nuno
    Jun 7 at 14:06













  • 1




    hey @aspiringurbandatascientist, are you still looking for a review for this?
    – Chris
    Jun 7 at 3:33










  • @Chris - while looking at this code definitely made me cringe, I would love another pair of eyes to look this over. I might even post an answer myself after I've learned the tidyverse and other helpful coding mechanics via courses on DataCamp. Thank you for taking the time to offer feedback!
    – Cristian E. Nuno
    Jun 7 at 14:06








1




1




hey @aspiringurbandatascientist, are you still looking for a review for this?
– Chris
Jun 7 at 3:33




hey @aspiringurbandatascientist, are you still looking for a review for this?
– Chris
Jun 7 at 3:33












@Chris - while looking at this code definitely made me cringe, I would love another pair of eyes to look this over. I might even post an answer myself after I've learned the tidyverse and other helpful coding mechanics via courses on DataCamp. Thank you for taking the time to offer feedback!
– Cristian E. Nuno
Jun 7 at 14:06





@Chris - while looking at this code definitely made me cringe, I would love another pair of eyes to look this over. I might even post an answer myself after I've learned the tidyverse and other helpful coding mechanics via courses on DataCamp. Thank you for taking the time to offer feedback!
– Cristian E. Nuno
Jun 7 at 14:06
















active

oldest

votes











Your Answer




StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);



);








 

draft saved


draft discarded


















StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f188347%2freshape-get-request-to-long-data-frame-in-r%23new-answer', 'question_page');

);

Post as a guest



































active

oldest

votes













active

oldest

votes









active

oldest

votes






active

oldest

votes










 

draft saved


draft discarded


























 


draft saved


draft discarded














StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f188347%2freshape-get-request-to-long-data-frame-in-r%23new-answer', 'question_page');

);

Post as a guest













































































Popular posts from this blog

Greedy Best First Search implementation in Rust

Function to Return a JSON Like Objects Using VBA Collections and Arrays

C++11 CLH Lock Implementation