Listing files whose names match a pattern with a date

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;

up vote
2
down vote

favorite

My directory folderMarket has lots of files with the same name but are tagged with a date string at the end. The date tag can be formatted differently, e.g. "2018-07-25" or "25Jul18". My helper function is tasked with extracting a path list matching each found file name against filename_list. is there a better way to build a filename_list instead of brute force used below?

from datetime import datetime


strToday = "2018-07-25"
files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatos_DATE.xml', 'tomates.DATE.csv']


def get_path_list(directory, base_filename_list, savedAsOf):

 strDate1 = savedAsOf
 filename_list1 = [n.replace('DATE', strDate1) for n in base_filename_list]

 strDate2 = datetime.strptime(savedAsOf, '%Y-%m-%d').strftime('%d%b%y')
 filename_list2 = [n.replace('DATE', strDate2) for n in base_filename_list]
 filename_list = filename_list1 + filename_list2

 path_list = 
 for file in os.listdir(directory):
 filename = os.fsdecode(file)
 if filename in filename_list:
 path_list.append(os.path.join(directory, filename))
 continue
 return path_list

print (len(get_path_list(folderMarket, files_market, strToday)))

edited Jul 30 at 22:10

200_success

123k14143398

asked Jul 30 at 21:09

Vrun

267

add a commentÂ |Â

up vote
2
down vote

favorite

from datetime import datetime


strToday = "2018-07-25"
files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatos_DATE.xml', 'tomates.DATE.csv']


def get_path_list(directory, base_filename_list, savedAsOf):

 strDate1 = savedAsOf
 filename_list1 = [n.replace('DATE', strDate1) for n in base_filename_list]

 strDate2 = datetime.strptime(savedAsOf, '%Y-%m-%d').strftime('%d%b%y')
 filename_list2 = [n.replace('DATE', strDate2) for n in base_filename_list]
 filename_list = filename_list1 + filename_list2

 path_list = 
 for file in os.listdir(directory):
 filename = os.fsdecode(file)
 if filename in filename_list:
 path_list.append(os.path.join(directory, filename))
 continue
 return path_list

print (len(get_path_list(folderMarket, files_market, strToday)))

edited Jul 30 at 22:10

200_success

123k14143398

asked Jul 30 at 21:09

Vrun

267

add a commentÂ |Â

up vote
2
down vote

favorite

from datetime import datetime


strToday = "2018-07-25"
files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatos_DATE.xml', 'tomates.DATE.csv']


def get_path_list(directory, base_filename_list, savedAsOf):

 strDate1 = savedAsOf
 filename_list1 = [n.replace('DATE', strDate1) for n in base_filename_list]

 strDate2 = datetime.strptime(savedAsOf, '%Y-%m-%d').strftime('%d%b%y')
 filename_list2 = [n.replace('DATE', strDate2) for n in base_filename_list]
 filename_list = filename_list1 + filename_list2

 path_list = 
 for file in os.listdir(directory):
 filename = os.fsdecode(file)
 if filename in filename_list:
 path_list.append(os.path.join(directory, filename))
 continue
 return path_list

print (len(get_path_list(folderMarket, files_market, strToday)))

edited Jul 30 at 22:10

200_success

123k14143398

asked Jul 30 at 21:09

Vrun

267

from datetime import datetime


strToday = "2018-07-25"
files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatos_DATE.xml', 'tomates.DATE.csv']


def get_path_list(directory, base_filename_list, savedAsOf):

 strDate1 = savedAsOf
 filename_list1 = [n.replace('DATE', strDate1) for n in base_filename_list]

 strDate2 = datetime.strptime(savedAsOf, '%Y-%m-%d').strftime('%d%b%y')
 filename_list2 = [n.replace('DATE', strDate2) for n in base_filename_list]
 filename_list = filename_list1 + filename_list2

 path_list = 
 for file in os.listdir(directory):
 filename = os.fsdecode(file)
 if filename in filename_list:
 path_list.append(os.path.join(directory, filename))
 continue
 return path_list

print (len(get_path_list(folderMarket, files_market, strToday)))

edited Jul 30 at 22:10

200_success

123k14143398

asked Jul 30 at 21:09

Vrun

267

edited Jul 30 at 22:10

200_success

123k14143398

edited Jul 30 at 22:10

200_success

123k14143398

edited Jul 30 at 22:10

200_success

123k14143398

asked Jul 30 at 21:09

Vrun

267

asked Jul 30 at 21:09

Vrun

267

asked Jul 30 at 21:09

Vrun

267

add a commentÂ |Â

2 Answers
2

active

oldest

votes

up vote
1
down vote

Firstly some tips:

Your code uses a mixture of snake_case and camelCase, you should stick to a specific style. If you're going to write python, PEP8 states snake_case should be used as the expected style.

Your code lacks a if __name__ == "__main__": starting section. It's recommended to have this. As an example, if you don't have this, when you use tools like Sphinx to produce code documentation, it will actually run your code instead of just building the objects. Also, it's kind of expected :-)

As to the core questions you have, you can use pathlib's Path object to assist with the globbing of files and matching file parameters, as well as the string formatting format function to replace "placeholders" with a certain value. For instance:

from pathlib import Path


def get_path_list(directory, base_filename_list, saved_as_of):
 for template in base_filename_list:
 file_date = template.format(saved_as_of)
 print(f"looking for file_date")
 files = Path(directory).glob(file_date)
 for file in files:
 print(str(file))


if __name__ == "__main__":
 today = "2018-07-25"
 files_market = ['apples_.xml', 'peaches_.xml', 'cucumbers_.xml', 'potatoes_.xml', 'tomatoes..csv']
 get_path_list(".", files_market, today)

The code above will get you 90% of the way there, but I'll leave that as an exercise for you to complete.

Good Luck!

answered Jul 31 at 7:56

C. Harley

5415

add a commentÂ |Â

up vote
0
down vote

Thank you for the pointers, here is all I could master. not sure how to use the format function the way you intended. I think pathlib glob only accepts str, not a list if i interpreted your answer correctly.

from pathlib import Path
from datetime import datetime


def get_path_list(directory, base_filename_list, saved_as_of):
 # list of possible save_as_of date formats
 date = datetime.strptime(saved_as_of, '%Y-%m-%d')
 formatted_dates_list = [saved_as_of, 
 date.strftime('%d%b%y'),]
 #extended list with each possible date format combo
 extended_base_filename_list = [f.replace("DATE", d) for f in base_filename_list
 for d in formatted_dates_list]
 globbed_list = [str(file.name) for f in formatted_dates_list
 for file in Path(directory).glob('*' + f + '.*')]
 #intersection of two lists
 return list(set(extended_base_filename_list) & set(globbed_list))


if __name__ == "__main__":
 today = "2018-07-25"
 folder = "."
 files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatoes_DATE.xml', 'tomatoes.DATE.csv']
 test = get_path_list(folder, files_market, today)
 print(*test)

this produces:

apples_2018-07-25.xml tomatos.25Jul18.csv cucumbers_2018-07-25.xml peaches_2018-07-25.xml potatos_2018-07-25.xml

answered Aug 1 at 19:25

Vrun

267

add a commentÂ |Â

Your Answer

StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f200620%2flisting-files-whose-names-match-a-pattern-with-a-date%23new-answer', 'question_page');

);

Post as a guest

Name

2 Answers
2

active

oldest

votes

2 Answers
2

active

oldest

votes

up vote
1
down vote

Firstly some tips:

Your code uses a mixture of snake_case and camelCase, you should stick to a specific style. If you're going to write python, PEP8 states snake_case should be used as the expected style.

Your code lacks a if __name__ == "__main__": starting section. It's recommended to have this. As an example, if you don't have this, when you use tools like Sphinx to produce code documentation, it will actually run your code instead of just building the objects. Also, it's kind of expected :-)

from pathlib import Path


def get_path_list(directory, base_filename_list, saved_as_of):
 for template in base_filename_list:
 file_date = template.format(saved_as_of)
 print(f"looking for file_date")
 files = Path(directory).glob(file_date)
 for file in files:
 print(str(file))


if __name__ == "__main__":
 today = "2018-07-25"
 files_market = ['apples_.xml', 'peaches_.xml', 'cucumbers_.xml', 'potatoes_.xml', 'tomatoes..csv']
 get_path_list(".", files_market, today)

The code above will get you 90% of the way there, but I'll leave that as an exercise for you to complete.

Good Luck!

answered Jul 31 at 7:56

C. Harley

5415

add a commentÂ |Â

up vote
1
down vote

Firstly some tips:

Your code uses a mixture of snake_case and camelCase, you should stick to a specific style. If you're going to write python, PEP8 states snake_case should be used as the expected style.

Your code lacks a if __name__ == "__main__": starting section. It's recommended to have this. As an example, if you don't have this, when you use tools like Sphinx to produce code documentation, it will actually run your code instead of just building the objects. Also, it's kind of expected :-)

from pathlib import Path


def get_path_list(directory, base_filename_list, saved_as_of):
 for template in base_filename_list:
 file_date = template.format(saved_as_of)
 print(f"looking for file_date")
 files = Path(directory).glob(file_date)
 for file in files:
 print(str(file))


if __name__ == "__main__":
 today = "2018-07-25"
 files_market = ['apples_.xml', 'peaches_.xml', 'cucumbers_.xml', 'potatoes_.xml', 'tomatoes..csv']
 get_path_list(".", files_market, today)

The code above will get you 90% of the way there, but I'll leave that as an exercise for you to complete.

Good Luck!

answered Jul 31 at 7:56

C. Harley

5415

add a commentÂ |Â

up vote
1
down vote

Firstly some tips:

Your code uses a mixture of snake_case and camelCase, you should stick to a specific style. If you're going to write python, PEP8 states snake_case should be used as the expected style.

Your code lacks a if __name__ == "__main__": starting section. It's recommended to have this. As an example, if you don't have this, when you use tools like Sphinx to produce code documentation, it will actually run your code instead of just building the objects. Also, it's kind of expected :-)

from pathlib import Path


def get_path_list(directory, base_filename_list, saved_as_of):
 for template in base_filename_list:
 file_date = template.format(saved_as_of)
 print(f"looking for file_date")
 files = Path(directory).glob(file_date)
 for file in files:
 print(str(file))


if __name__ == "__main__":
 today = "2018-07-25"
 files_market = ['apples_.xml', 'peaches_.xml', 'cucumbers_.xml', 'potatoes_.xml', 'tomatoes..csv']
 get_path_list(".", files_market, today)

The code above will get you 90% of the way there, but I'll leave that as an exercise for you to complete.

Good Luck!

answered Jul 31 at 7:56

C. Harley

5415

Firstly some tips:

Your code uses a mixture of snake_case and camelCase, you should stick to a specific style. If you're going to write python, PEP8 states snake_case should be used as the expected style.

Your code lacks a if __name__ == "__main__": starting section. It's recommended to have this. As an example, if you don't have this, when you use tools like Sphinx to produce code documentation, it will actually run your code instead of just building the objects. Also, it's kind of expected :-)

from pathlib import Path


def get_path_list(directory, base_filename_list, saved_as_of):
 for template in base_filename_list:
 file_date = template.format(saved_as_of)
 print(f"looking for file_date")
 files = Path(directory).glob(file_date)
 for file in files:
 print(str(file))


if __name__ == "__main__":
 today = "2018-07-25"
 files_market = ['apples_.xml', 'peaches_.xml', 'cucumbers_.xml', 'potatoes_.xml', 'tomatoes..csv']
 get_path_list(".", files_market, today)

The code above will get you 90% of the way there, but I'll leave that as an exercise for you to complete.

Good Luck!

answered Jul 31 at 7:56

C. Harley

5415

answered Jul 31 at 7:56

C. Harley

5415

answered Jul 31 at 7:56

C. Harley

5415

answered Jul 31 at 7:56

C. Harley

5415

add a commentÂ |Â

up vote
0
down vote

from pathlib import Path
from datetime import datetime


def get_path_list(directory, base_filename_list, saved_as_of):
 # list of possible save_as_of date formats
 date = datetime.strptime(saved_as_of, '%Y-%m-%d')
 formatted_dates_list = [saved_as_of, 
 date.strftime('%d%b%y'),]
 #extended list with each possible date format combo
 extended_base_filename_list = [f.replace("DATE", d) for f in base_filename_list
 for d in formatted_dates_list]
 globbed_list = [str(file.name) for f in formatted_dates_list
 for file in Path(directory).glob('*' + f + '.*')]
 #intersection of two lists
 return list(set(extended_base_filename_list) & set(globbed_list))


if __name__ == "__main__":
 today = "2018-07-25"
 folder = "."
 files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatoes_DATE.xml', 'tomatoes.DATE.csv']
 test = get_path_list(folder, files_market, today)
 print(*test)

this produces:

apples_2018-07-25.xml tomatos.25Jul18.csv cucumbers_2018-07-25.xml peaches_2018-07-25.xml potatos_2018-07-25.xml

answered Aug 1 at 19:25

Vrun

267

add a commentÂ |Â

up vote
0
down vote

from pathlib import Path
from datetime import datetime


def get_path_list(directory, base_filename_list, saved_as_of):
 # list of possible save_as_of date formats
 date = datetime.strptime(saved_as_of, '%Y-%m-%d')
 formatted_dates_list = [saved_as_of, 
 date.strftime('%d%b%y'),]
 #extended list with each possible date format combo
 extended_base_filename_list = [f.replace("DATE", d) for f in base_filename_list
 for d in formatted_dates_list]
 globbed_list = [str(file.name) for f in formatted_dates_list
 for file in Path(directory).glob('*' + f + '.*')]
 #intersection of two lists
 return list(set(extended_base_filename_list) & set(globbed_list))


if __name__ == "__main__":
 today = "2018-07-25"
 folder = "."
 files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatoes_DATE.xml', 'tomatoes.DATE.csv']
 test = get_path_list(folder, files_market, today)
 print(*test)

this produces:

apples_2018-07-25.xml tomatos.25Jul18.csv cucumbers_2018-07-25.xml peaches_2018-07-25.xml potatos_2018-07-25.xml

answered Aug 1 at 19:25

Vrun

267

add a commentÂ |Â

up vote
0
down vote

from pathlib import Path
from datetime import datetime


def get_path_list(directory, base_filename_list, saved_as_of):
 # list of possible save_as_of date formats
 date = datetime.strptime(saved_as_of, '%Y-%m-%d')
 formatted_dates_list = [saved_as_of, 
 date.strftime('%d%b%y'),]
 #extended list with each possible date format combo
 extended_base_filename_list = [f.replace("DATE", d) for f in base_filename_list
 for d in formatted_dates_list]
 globbed_list = [str(file.name) for f in formatted_dates_list
 for file in Path(directory).glob('*' + f + '.*')]
 #intersection of two lists
 return list(set(extended_base_filename_list) & set(globbed_list))


if __name__ == "__main__":
 today = "2018-07-25"
 folder = "."
 files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatoes_DATE.xml', 'tomatoes.DATE.csv']
 test = get_path_list(folder, files_market, today)
 print(*test)

this produces:

apples_2018-07-25.xml tomatos.25Jul18.csv cucumbers_2018-07-25.xml peaches_2018-07-25.xml potatos_2018-07-25.xml

answered Aug 1 at 19:25

Vrun

267

from pathlib import Path
from datetime import datetime


def get_path_list(directory, base_filename_list, saved_as_of):
 # list of possible save_as_of date formats
 date = datetime.strptime(saved_as_of, '%Y-%m-%d')
 formatted_dates_list = [saved_as_of, 
 date.strftime('%d%b%y'),]
 #extended list with each possible date format combo
 extended_base_filename_list = [f.replace("DATE", d) for f in base_filename_list
 for d in formatted_dates_list]
 globbed_list = [str(file.name) for f in formatted_dates_list
 for file in Path(directory).glob('*' + f + '.*')]
 #intersection of two lists
 return list(set(extended_base_filename_list) & set(globbed_list))


if __name__ == "__main__":
 today = "2018-07-25"
 folder = "."
 files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatoes_DATE.xml', 'tomatoes.DATE.csv']
 test = get_path_list(folder, files_market, today)
 print(*test)

this produces:

apples_2018-07-25.xml tomatos.25Jul18.csv cucumbers_2018-07-25.xml peaches_2018-07-25.xml potatos_2018-07-25.xml

answered Aug 1 at 19:25

Vrun

267

answered Aug 1 at 19:25

Vrun

267

answered Aug 1 at 19:25

Vrun

267

answered Aug 1 at 19:25

Vrun

267

add a commentÂ |Â

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

搜尋此網誌

trjhtr