pattern matching, replacement and for loop optimization in R

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;

up vote
0
down vote

favorite

0
down vote
favorite

I have two data frame loc_df and and city_df (city and country) now loc_df has 5 column but considering only 2 here (Organization.Location.1 and Organization.Location.2) with 35000 row and city_df has 2 column (city and country) with 1000 rows. Now I am taking one value from city cloumn and matching with organisation column using grepl (for text matching ) and for loop(for iteration). I also have to maintain a index that's why I am using for loop. But this is taking huge amount of time.

I am trying to replace each city, state, province name to their country name in organization columns.

Please help me to optimize this code. I am very new to R.

for(k in 1:2)
 if(k==1)

 for (i in 1:nrow(city_df)) 
 x1 <- paste(" ", city_df$City[i], sep = "")
 x2 <- paste(" ", city_df$City[i], " ", sep = "")
 x3 <- paste(city_df$City[i], " ", sep = "")
 # print(x1)

 for (j in 1:nrow(loc_df)) 
 #print(loc_df$Organization.Location.1[j])


 if (grepl(x1, loc_df$Organization.Location.1[j]) 
 
 
 if(k==2)

 for (i in 1:nrow(city_df)) 
 x1 <- paste(" ", city_df$City[i], sep = "")
 x2 <- paste(" ", city_df$City[i], " ", sep = "")
 x3 <- paste(city_df$City[i], " ", sep = "")


 for (j in 1:nrow(loc_df)) 
 grepl(x3, loc_df$Organization.Location.3[j])) 
 loc_df$org_new1[j] <- city_df$Country[i]
 break

this is sample data I have generated using dput of city_df

structure(list(City = c("zug", "canton of zug", "zimbabwe", 
 "zigong chengdu", "zhuhai guangdong china", "zaragoza spain"), Country = c("switzerland", 
 "switzerland", "zimbabwe", "china", "china", "spain"
 )), .Names = c("City", "Country"), row.names = c(NA, 6L), class = "data.frame")

sample of loc_df

structure(list(Organization.Location.1 = c("zug switzerland", 
"zug canton of zug switzerland", "zimbabwe", "zigong chengdu pr china", 
"zhuhai guangdong china", "zaragoza spain"), Organization.Location.2 = c("", 
"san francisco bay area", "london canada area", "beijing city china", 
"greater atlanta area", "paris area france")), .Names = c("Organization.Location.1", 
"Organization.Location.2"), row.names = c(NA, 6L), class = "data.frame")

edited Mar 22 at 12:41

asked Mar 22 at 11:58

Girijesh Singh

you have not supplied case in your data when there is a match
â€“Â minem
Mar 22 at 12:15

@minem loc_df$org_new1[j] <- city_df$Country[i] this line of code is supplying data when there is a match, And it present in above code too
â€“Â Girijesh Singh
Mar 22 at 12:32

You have not supplied in your example data a valid example when the conditiona are met
â€“Â minem
Mar 22 at 12:34

@minem sorry sir, I have updated the question now.
â€“Â Girijesh Singh
Mar 22 at 12:41

add a commentÂ |Â

up vote
0
down vote

favorite

0
down vote
favorite

I am trying to replace each city, state, province name to their country name in organization columns.

Please help me to optimize this code. I am very new to R.

for(k in 1:2)
 if(k==1)

 for (i in 1:nrow(city_df)) 
 x1 <- paste(" ", city_df$City[i], sep = "")
 x2 <- paste(" ", city_df$City[i], " ", sep = "")
 x3 <- paste(city_df$City[i], " ", sep = "")
 # print(x1)

 for (j in 1:nrow(loc_df)) 
 #print(loc_df$Organization.Location.1[j])


 if (grepl(x1, loc_df$Organization.Location.1[j]) 
 
 
 if(k==2)

 for (i in 1:nrow(city_df)) 
 x1 <- paste(" ", city_df$City[i], sep = "")
 x2 <- paste(" ", city_df$City[i], " ", sep = "")
 x3 <- paste(city_df$City[i], " ", sep = "")


 for (j in 1:nrow(loc_df)) 
 grepl(x3, loc_df$Organization.Location.3[j])) 
 loc_df$org_new1[j] <- city_df$Country[i]
 break

this is sample data I have generated using dput of city_df

structure(list(City = c("zug", "canton of zug", "zimbabwe", 
 "zigong chengdu", "zhuhai guangdong china", "zaragoza spain"), Country = c("switzerland", 
 "switzerland", "zimbabwe", "china", "china", "spain"
 )), .Names = c("City", "Country"), row.names = c(NA, 6L), class = "data.frame")

sample of loc_df

structure(list(Organization.Location.1 = c("zug switzerland", 
"zug canton of zug switzerland", "zimbabwe", "zigong chengdu pr china", 
"zhuhai guangdong china", "zaragoza spain"), Organization.Location.2 = c("", 
"san francisco bay area", "london canada area", "beijing city china", 
"greater atlanta area", "paris area france")), .Names = c("Organization.Location.1", 
"Organization.Location.2"), row.names = c(NA, 6L), class = "data.frame")

edited Mar 22 at 12:41

asked Mar 22 at 11:58

Girijesh Singh

you have not supplied case in your data when there is a match
â€“Â minem
Mar 22 at 12:15

@minem loc_df$org_new1[j] <- city_df$Country[i] this line of code is supplying data when there is a match, And it present in above code too
â€“Â Girijesh Singh
Mar 22 at 12:32

You have not supplied in your example data a valid example when the conditiona are met
â€“Â minem
Mar 22 at 12:34

@minem sorry sir, I have updated the question now.
â€“Â Girijesh Singh
Mar 22 at 12:41

add a commentÂ |Â

up vote
0
down vote

favorite

0
down vote
favorite

I am trying to replace each city, state, province name to their country name in organization columns.

Please help me to optimize this code. I am very new to R.

for(k in 1:2)
 if(k==1)

 for (i in 1:nrow(city_df)) 
 x1 <- paste(" ", city_df$City[i], sep = "")
 x2 <- paste(" ", city_df$City[i], " ", sep = "")
 x3 <- paste(city_df$City[i], " ", sep = "")
 # print(x1)

 for (j in 1:nrow(loc_df)) 
 #print(loc_df$Organization.Location.1[j])


 if (grepl(x1, loc_df$Organization.Location.1[j]) 
 
 
 if(k==2)

 for (i in 1:nrow(city_df)) 
 x1 <- paste(" ", city_df$City[i], sep = "")
 x2 <- paste(" ", city_df$City[i], " ", sep = "")
 x3 <- paste(city_df$City[i], " ", sep = "")


 for (j in 1:nrow(loc_df)) 
 grepl(x3, loc_df$Organization.Location.3[j])) 
 loc_df$org_new1[j] <- city_df$Country[i]
 break

this is sample data I have generated using dput of city_df

structure(list(City = c("zug", "canton of zug", "zimbabwe", 
 "zigong chengdu", "zhuhai guangdong china", "zaragoza spain"), Country = c("switzerland", 
 "switzerland", "zimbabwe", "china", "china", "spain"
 )), .Names = c("City", "Country"), row.names = c(NA, 6L), class = "data.frame")

sample of loc_df

structure(list(Organization.Location.1 = c("zug switzerland", 
"zug canton of zug switzerland", "zimbabwe", "zigong chengdu pr china", 
"zhuhai guangdong china", "zaragoza spain"), Organization.Location.2 = c("", 
"san francisco bay area", "london canada area", "beijing city china", 
"greater atlanta area", "paris area france")), .Names = c("Organization.Location.1", 
"Organization.Location.2"), row.names = c(NA, 6L), class = "data.frame")

edited Mar 22 at 12:41

asked Mar 22 at 11:58

Girijesh Singh

0
down vote
favorite

I am trying to replace each city, state, province name to their country name in organization columns.

Please help me to optimize this code. I am very new to R.

for(k in 1:2)
 if(k==1)

 for (i in 1:nrow(city_df)) 
 x1 <- paste(" ", city_df$City[i], sep = "")
 x2 <- paste(" ", city_df$City[i], " ", sep = "")
 x3 <- paste(city_df$City[i], " ", sep = "")
 # print(x1)

 for (j in 1:nrow(loc_df)) 
 #print(loc_df$Organization.Location.1[j])


 if (grepl(x1, loc_df$Organization.Location.1[j]) 
 
 
 if(k==2)

 for (i in 1:nrow(city_df)) 
 x1 <- paste(" ", city_df$City[i], sep = "")
 x2 <- paste(" ", city_df$City[i], " ", sep = "")
 x3 <- paste(city_df$City[i], " ", sep = "")


 for (j in 1:nrow(loc_df)) 
 grepl(x3, loc_df$Organization.Location.3[j])) 
 loc_df$org_new1[j] <- city_df$Country[i]
 break

this is sample data I have generated using dput of city_df

structure(list(City = c("zug", "canton of zug", "zimbabwe", 
 "zigong chengdu", "zhuhai guangdong china", "zaragoza spain"), Country = c("switzerland", 
 "switzerland", "zimbabwe", "china", "china", "spain"
 )), .Names = c("City", "Country"), row.names = c(NA, 6L), class = "data.frame")

sample of loc_df

structure(list(Organization.Location.1 = c("zug switzerland", 
"zug canton of zug switzerland", "zimbabwe", "zigong chengdu pr china", 
"zhuhai guangdong china", "zaragoza spain"), Organization.Location.2 = c("", 
"san francisco bay area", "london canada area", "beijing city china", 
"greater atlanta area", "paris area france")), .Names = c("Organization.Location.1", 
"Organization.Location.2"), row.names = c(NA, 6L), class = "data.frame")

edited Mar 22 at 12:41

asked Mar 22 at 11:58

Girijesh Singh

edited Mar 22 at 12:41

asked Mar 22 at 11:58

Girijesh Singh

asked Mar 22 at 11:58

Girijesh Singh

asked Mar 22 at 11:58

Girijesh Singh

you have not supplied case in your data when there is a match
â€“Â minem
Mar 22 at 12:15

@minem loc_df$org_new1[j] <- city_df$Country[i] this line of code is supplying data when there is a match, And it present in above code too
â€“Â Girijesh Singh
Mar 22 at 12:32

You have not supplied in your example data a valid example when the conditiona are met
â€“Â minem
Mar 22 at 12:34

@minem sorry sir, I have updated the question now.
â€“Â Girijesh Singh
Mar 22 at 12:41

add a commentÂ |Â

you have not supplied case in your data when there is a match
â€“Â minem
Mar 22 at 12:15

@minem loc_df$org_new1[j] <- city_df$Country[i] this line of code is supplying data when there is a match, And it present in above code too
â€“Â Girijesh Singh
Mar 22 at 12:32

You have not supplied in your example data a valid example when the conditiona are met
â€“Â minem
Mar 22 at 12:34

@minem sorry sir, I have updated the question now.
â€“Â Girijesh Singh
Mar 22 at 12:41

you have not supplied case in your data when there is a match
â€“Â minem
Mar 22 at 12:15

@minem loc_df$org_new1[j] <- city_df$Country[i] this line of code is supplying data when there is a match, And it present in above code too
â€“Â Girijesh Singh
Mar 22 at 12:32

You have not supplied in your example data a valid example when the conditiona are met
â€“Â minem
Mar 22 at 12:34

@minem sorry sir, I have updated the question now.
â€“Â Girijesh Singh
Mar 22 at 12:41

add a commentÂ |Â

1 Answer
1

active

oldest

votes

up vote
0
down vote

You can try something like this:

# function for string preperation:
preperString <- function(x) 
 require(stringr)
 x <- str_to_lower(x)
 x <- str_trim(x)
 x


setDT(loc_df) # convert data.frames to data.table
setDT(city_df)

loc_df <- loc_df[, lapply(.SD, preperString)] # apply string preperation to all columns of loc_df
city_df[, City := preperString(City)]

loc_df <- merge(loc_df, city_df, by.x = 'Organization.Location.1',
 by.y = 'City', all.x = T, sort = F)
loc_df <- merge(loc_df, city_df, by.x = 'Organization.Location.2',
 by.y = 'City', all.x = T, sort = F)
loc_df
# Organization.Location.2 Organization.Location.1 Country.x Country.y
# 1: zug switzerland NA NA
# 2: san francisco bay area zug canton of zug switzerland NA NA
# 3: london canada area zimbabwe zimbabwe NA
# 4: beijing city china zigong chengdu pr china NA NA
# 5: greater atlanta area zhuhai guangdong china china NA
# 6: paris area france zaragoza spain spain NA

# and then you can write rule tu create org_new1, for example:
loc_df[, org_new1 := Country.x]
loc_df[is.na(org_new1), org_new1 := Country.y]
loc_df
# Organization.Location.2 Organization.Location.1 Country.x Country.y org_new1
# 1: zug switzerland NA NA NA
# 2: san francisco bay area zug canton of zug switzerland NA NA NA
# 3: london canada area zimbabwe zimbabwe NA zimbabwe
# 4: beijing city china zigong chengdu pr china NA NA NA
# 5: greater atlanta area zhuhai guangdong china china NA china
# 6: paris area france zaragoza spain spain NA spain

answered Mar 22 at 12:44

minem

232139

thank you for your answer but when I am trying to run your code I am getting output like this Error in vecseq(f__, len__, if (allow.cartesian || notjoin || !anyDuplicated(f__, : Join results in 526312 rows; more than 47285 = nrow(x)+nrow(i). Check for duplicate key values in i each of which join to the same group in x over and over again. If that's ok, try by=.EACHI to run j for each group to avoid the large allocation. If you are sure you wish to proceed, rerun with allow.cartesian=TRUE.
â€“Â Girijesh Singh
Mar 22 at 13:28

@GirijeshSingh As I do not see your data it is hard to help you, but the error message could be a start. There is stated: ''Check for duplicate key values'', maybe try to do that and remove the duplicates?
â€“Â minem
Mar 22 at 13:32

here is the sample of data github.com/girijesh18/dataset
â€“Â Girijesh Singh
Mar 22 at 13:48

please help me to figure it out
â€“Â Girijesh Singh
Mar 22 at 13:48

city_df <- city_df[City != '']; city_df <- unique(city_df)
â€“Â minem
Mar 22 at 13:58

Â |Â
show 3 more comments

Your Answer

StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f190195%2fpattern-matching-replacement-and-for-loop-optimization-in-r%23new-answer', 'question_page');

);

Post as a guest

Name

1 Answer
1

active

oldest

votes

1 Answer
1

active

oldest

votes

up vote
0
down vote

You can try something like this:

# function for string preperation:
preperString <- function(x) 
 require(stringr)
 x <- str_to_lower(x)
 x <- str_trim(x)
 x


setDT(loc_df) # convert data.frames to data.table
setDT(city_df)

loc_df <- loc_df[, lapply(.SD, preperString)] # apply string preperation to all columns of loc_df
city_df[, City := preperString(City)]

loc_df <- merge(loc_df, city_df, by.x = 'Organization.Location.1',
 by.y = 'City', all.x = T, sort = F)
loc_df <- merge(loc_df, city_df, by.x = 'Organization.Location.2',
 by.y = 'City', all.x = T, sort = F)
loc_df
# Organization.Location.2 Organization.Location.1 Country.x Country.y
# 1: zug switzerland NA NA
# 2: san francisco bay area zug canton of zug switzerland NA NA
# 3: london canada area zimbabwe zimbabwe NA
# 4: beijing city china zigong chengdu pr china NA NA
# 5: greater atlanta area zhuhai guangdong china china NA
# 6: paris area france zaragoza spain spain NA

# and then you can write rule tu create org_new1, for example:
loc_df[, org_new1 := Country.x]
loc_df[is.na(org_new1), org_new1 := Country.y]
loc_df
# Organization.Location.2 Organization.Location.1 Country.x Country.y org_new1
# 1: zug switzerland NA NA NA
# 2: san francisco bay area zug canton of zug switzerland NA NA NA
# 3: london canada area zimbabwe zimbabwe NA zimbabwe
# 4: beijing city china zigong chengdu pr china NA NA NA
# 5: greater atlanta area zhuhai guangdong china china NA china
# 6: paris area france zaragoza spain spain NA spain

answered Mar 22 at 12:44

minem

232139

thank you for your answer but when I am trying to run your code I am getting output like this Error in vecseq(f__, len__, if (allow.cartesian || notjoin || !anyDuplicated(f__, : Join results in 526312 rows; more than 47285 = nrow(x)+nrow(i). Check for duplicate key values in i each of which join to the same group in x over and over again. If that's ok, try by=.EACHI to run j for each group to avoid the large allocation. If you are sure you wish to proceed, rerun with allow.cartesian=TRUE.
â€“Â Girijesh Singh
Mar 22 at 13:28

@GirijeshSingh As I do not see your data it is hard to help you, but the error message could be a start. There is stated: ''Check for duplicate key values'', maybe try to do that and remove the duplicates?
â€“Â minem
Mar 22 at 13:32

here is the sample of data github.com/girijesh18/dataset
â€“Â Girijesh Singh
Mar 22 at 13:48

please help me to figure it out
â€“Â Girijesh Singh
Mar 22 at 13:48

city_df <- city_df[City != '']; city_df <- unique(city_df)
â€“Â minem
Mar 22 at 13:58

Â |Â
show 3 more comments

up vote
0
down vote

You can try something like this:

# function for string preperation:
preperString <- function(x) 
 require(stringr)
 x <- str_to_lower(x)
 x <- str_trim(x)
 x


setDT(loc_df) # convert data.frames to data.table
setDT(city_df)

loc_df <- loc_df[, lapply(.SD, preperString)] # apply string preperation to all columns of loc_df
city_df[, City := preperString(City)]

loc_df <- merge(loc_df, city_df, by.x = 'Organization.Location.1',
 by.y = 'City', all.x = T, sort = F)
loc_df <- merge(loc_df, city_df, by.x = 'Organization.Location.2',
 by.y = 'City', all.x = T, sort = F)
loc_df
# Organization.Location.2 Organization.Location.1 Country.x Country.y
# 1: zug switzerland NA NA
# 2: san francisco bay area zug canton of zug switzerland NA NA
# 3: london canada area zimbabwe zimbabwe NA
# 4: beijing city china zigong chengdu pr china NA NA
# 5: greater atlanta area zhuhai guangdong china china NA
# 6: paris area france zaragoza spain spain NA

# and then you can write rule tu create org_new1, for example:
loc_df[, org_new1 := Country.x]
loc_df[is.na(org_new1), org_new1 := Country.y]
loc_df
# Organization.Location.2 Organization.Location.1 Country.x Country.y org_new1
# 1: zug switzerland NA NA NA
# 2: san francisco bay area zug canton of zug switzerland NA NA NA
# 3: london canada area zimbabwe zimbabwe NA zimbabwe
# 4: beijing city china zigong chengdu pr china NA NA NA
# 5: greater atlanta area zhuhai guangdong china china NA china
# 6: paris area france zaragoza spain spain NA spain

answered Mar 22 at 12:44

minem

232139

thank you for your answer but when I am trying to run your code I am getting output like this Error in vecseq(f__, len__, if (allow.cartesian || notjoin || !anyDuplicated(f__, : Join results in 526312 rows; more than 47285 = nrow(x)+nrow(i). Check for duplicate key values in i each of which join to the same group in x over and over again. If that's ok, try by=.EACHI to run j for each group to avoid the large allocation. If you are sure you wish to proceed, rerun with allow.cartesian=TRUE.
â€“Â Girijesh Singh
Mar 22 at 13:28

@GirijeshSingh As I do not see your data it is hard to help you, but the error message could be a start. There is stated: ''Check for duplicate key values'', maybe try to do that and remove the duplicates?
â€“Â minem
Mar 22 at 13:32

here is the sample of data github.com/girijesh18/dataset
â€“Â Girijesh Singh
Mar 22 at 13:48

please help me to figure it out
â€“Â Girijesh Singh
Mar 22 at 13:48

city_df <- city_df[City != '']; city_df <- unique(city_df)
â€“Â minem
Mar 22 at 13:58

Â |Â
show 3 more comments

up vote
0
down vote

You can try something like this:

# function for string preperation:
preperString <- function(x) 
 require(stringr)
 x <- str_to_lower(x)
 x <- str_trim(x)
 x


setDT(loc_df) # convert data.frames to data.table
setDT(city_df)

loc_df <- loc_df[, lapply(.SD, preperString)] # apply string preperation to all columns of loc_df
city_df[, City := preperString(City)]

loc_df <- merge(loc_df, city_df, by.x = 'Organization.Location.1',
 by.y = 'City', all.x = T, sort = F)
loc_df <- merge(loc_df, city_df, by.x = 'Organization.Location.2',
 by.y = 'City', all.x = T, sort = F)
loc_df
# Organization.Location.2 Organization.Location.1 Country.x Country.y
# 1: zug switzerland NA NA
# 2: san francisco bay area zug canton of zug switzerland NA NA
# 3: london canada area zimbabwe zimbabwe NA
# 4: beijing city china zigong chengdu pr china NA NA
# 5: greater atlanta area zhuhai guangdong china china NA
# 6: paris area france zaragoza spain spain NA

# and then you can write rule tu create org_new1, for example:
loc_df[, org_new1 := Country.x]
loc_df[is.na(org_new1), org_new1 := Country.y]
loc_df
# Organization.Location.2 Organization.Location.1 Country.x Country.y org_new1
# 1: zug switzerland NA NA NA
# 2: san francisco bay area zug canton of zug switzerland NA NA NA
# 3: london canada area zimbabwe zimbabwe NA zimbabwe
# 4: beijing city china zigong chengdu pr china NA NA NA
# 5: greater atlanta area zhuhai guangdong china china NA china
# 6: paris area france zaragoza spain spain NA spain

answered Mar 22 at 12:44

minem

232139

You can try something like this:

# function for string preperation:
preperString <- function(x) 
 require(stringr)
 x <- str_to_lower(x)
 x <- str_trim(x)
 x


setDT(loc_df) # convert data.frames to data.table
setDT(city_df)

loc_df <- loc_df[, lapply(.SD, preperString)] # apply string preperation to all columns of loc_df
city_df[, City := preperString(City)]

loc_df <- merge(loc_df, city_df, by.x = 'Organization.Location.1',
 by.y = 'City', all.x = T, sort = F)
loc_df <- merge(loc_df, city_df, by.x = 'Organization.Location.2',
 by.y = 'City', all.x = T, sort = F)
loc_df
# Organization.Location.2 Organization.Location.1 Country.x Country.y
# 1: zug switzerland NA NA
# 2: san francisco bay area zug canton of zug switzerland NA NA
# 3: london canada area zimbabwe zimbabwe NA
# 4: beijing city china zigong chengdu pr china NA NA
# 5: greater atlanta area zhuhai guangdong china china NA
# 6: paris area france zaragoza spain spain NA

# and then you can write rule tu create org_new1, for example:
loc_df[, org_new1 := Country.x]
loc_df[is.na(org_new1), org_new1 := Country.y]
loc_df
# Organization.Location.2 Organization.Location.1 Country.x Country.y org_new1
# 1: zug switzerland NA NA NA
# 2: san francisco bay area zug canton of zug switzerland NA NA NA
# 3: london canada area zimbabwe zimbabwe NA zimbabwe
# 4: beijing city china zigong chengdu pr china NA NA NA
# 5: greater atlanta area zhuhai guangdong china china NA china
# 6: paris area france zaragoza spain spain NA spain

answered Mar 22 at 12:44

minem

232139

answered Mar 22 at 12:44

minem

232139

answered Mar 22 at 12:44

minem

232139

answered Mar 22 at 12:44

minem

232139

thank you for your answer but when I am trying to run your code I am getting output like this Error in vecseq(f__, len__, if (allow.cartesian || notjoin || !anyDuplicated(f__, : Join results in 526312 rows; more than 47285 = nrow(x)+nrow(i). Check for duplicate key values in i each of which join to the same group in x over and over again. If that's ok, try by=.EACHI to run j for each group to avoid the large allocation. If you are sure you wish to proceed, rerun with allow.cartesian=TRUE.
â€“Â Girijesh Singh
Mar 22 at 13:28

@GirijeshSingh As I do not see your data it is hard to help you, but the error message could be a start. There is stated: ''Check for duplicate key values'', maybe try to do that and remove the duplicates?
â€“Â minem
Mar 22 at 13:32

here is the sample of data github.com/girijesh18/dataset
â€“Â Girijesh Singh
Mar 22 at 13:48

please help me to figure it out
â€“Â Girijesh Singh
Mar 22 at 13:48

city_df <- city_df[City != '']; city_df <- unique(city_df)
â€“Â minem
Mar 22 at 13:58

Â |Â
show 3 more comments

thank you for your answer but when I am trying to run your code I am getting output like this Error in vecseq(f__, len__, if (allow.cartesian || notjoin || !anyDuplicated(f__, : Join results in 526312 rows; more than 47285 = nrow(x)+nrow(i). Check for duplicate key values in i each of which join to the same group in x over and over again. If that's ok, try by=.EACHI to run j for each group to avoid the large allocation. If you are sure you wish to proceed, rerun with allow.cartesian=TRUE.
â€“Â Girijesh Singh
Mar 22 at 13:28

@GirijeshSingh As I do not see your data it is hard to help you, but the error message could be a start. There is stated: ''Check for duplicate key values'', maybe try to do that and remove the duplicates?
â€“Â minem
Mar 22 at 13:32

here is the sample of data github.com/girijesh18/dataset
â€“Â Girijesh Singh
Mar 22 at 13:48

please help me to figure it out
â€“Â Girijesh Singh
Mar 22 at 13:48

city_df <- city_df[City != '']; city_df <- unique(city_df)
â€“Â minem
Mar 22 at 13:58

thank you for your answer but when I am trying to run your code I am getting output like this Error in vecseq(f__, len__, if (allow.cartesian || notjoin || !anyDuplicated(f__, : Join results in 526312 rows; more than 47285 = nrow(x)+nrow(i). Check for duplicate key values in i each of which join to the same group in x over and over again. If that's ok, try by=.EACHI to run j for each group to avoid the large allocation. If you are sure you wish to proceed, rerun with allow.cartesian=TRUE.
â€“Â Girijesh Singh
Mar 22 at 13:28

@GirijeshSingh As I do not see your data it is hard to help you, but the error message could be a start. There is stated: ''Check for duplicate key values'', maybe try to do that and remove the duplicates?
â€“Â minem
Mar 22 at 13:32

here is the sample of data github.com/girijesh18/dataset
â€“Â Girijesh Singh
Mar 22 at 13:48

please help me to figure it out
â€“Â Girijesh Singh
Mar 22 at 13:48

city_df <- city_df[City != '']; city_df <- unique(city_df)
â€“Â minem
Mar 22 at 13:58

Â |Â
show 3 more comments

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

搜尋此網誌

trjhtr