Listing files whose names match a pattern with a date

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
2
down vote

favorite












My directory folderMarket has lots of files with the same name but are tagged with a date string at the end. The date tag can be formatted differently, e.g. "2018-07-25" or "25Jul18". My helper function is tasked with extracting a path list matching each found file name against filename_list. is there a better way to build a filename_list instead of brute force used below?



from datetime import datetime


strToday = "2018-07-25"
files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatos_DATE.xml', 'tomates.DATE.csv']


def get_path_list(directory, base_filename_list, savedAsOf):

strDate1 = savedAsOf
filename_list1 = [n.replace('DATE', strDate1) for n in base_filename_list]

strDate2 = datetime.strptime(savedAsOf, '%Y-%m-%d').strftime('%d%b%y')
filename_list2 = [n.replace('DATE', strDate2) for n in base_filename_list]
filename_list = filename_list1 + filename_list2

path_list =
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename in filename_list:
path_list.append(os.path.join(directory, filename))
continue
return path_list

print (len(get_path_list(folderMarket, files_market, strToday)))






share|improve this question



























    up vote
    2
    down vote

    favorite












    My directory folderMarket has lots of files with the same name but are tagged with a date string at the end. The date tag can be formatted differently, e.g. "2018-07-25" or "25Jul18". My helper function is tasked with extracting a path list matching each found file name against filename_list. is there a better way to build a filename_list instead of brute force used below?



    from datetime import datetime


    strToday = "2018-07-25"
    files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatos_DATE.xml', 'tomates.DATE.csv']


    def get_path_list(directory, base_filename_list, savedAsOf):

    strDate1 = savedAsOf
    filename_list1 = [n.replace('DATE', strDate1) for n in base_filename_list]

    strDate2 = datetime.strptime(savedAsOf, '%Y-%m-%d').strftime('%d%b%y')
    filename_list2 = [n.replace('DATE', strDate2) for n in base_filename_list]
    filename_list = filename_list1 + filename_list2

    path_list =
    for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename in filename_list:
    path_list.append(os.path.join(directory, filename))
    continue
    return path_list

    print (len(get_path_list(folderMarket, files_market, strToday)))






    share|improve this question























      up vote
      2
      down vote

      favorite









      up vote
      2
      down vote

      favorite











      My directory folderMarket has lots of files with the same name but are tagged with a date string at the end. The date tag can be formatted differently, e.g. "2018-07-25" or "25Jul18". My helper function is tasked with extracting a path list matching each found file name against filename_list. is there a better way to build a filename_list instead of brute force used below?



      from datetime import datetime


      strToday = "2018-07-25"
      files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatos_DATE.xml', 'tomates.DATE.csv']


      def get_path_list(directory, base_filename_list, savedAsOf):

      strDate1 = savedAsOf
      filename_list1 = [n.replace('DATE', strDate1) for n in base_filename_list]

      strDate2 = datetime.strptime(savedAsOf, '%Y-%m-%d').strftime('%d%b%y')
      filename_list2 = [n.replace('DATE', strDate2) for n in base_filename_list]
      filename_list = filename_list1 + filename_list2

      path_list =
      for file in os.listdir(directory):
      filename = os.fsdecode(file)
      if filename in filename_list:
      path_list.append(os.path.join(directory, filename))
      continue
      return path_list

      print (len(get_path_list(folderMarket, files_market, strToday)))






      share|improve this question













      My directory folderMarket has lots of files with the same name but are tagged with a date string at the end. The date tag can be formatted differently, e.g. "2018-07-25" or "25Jul18". My helper function is tasked with extracting a path list matching each found file name against filename_list. is there a better way to build a filename_list instead of brute force used below?



      from datetime import datetime


      strToday = "2018-07-25"
      files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatos_DATE.xml', 'tomates.DATE.csv']


      def get_path_list(directory, base_filename_list, savedAsOf):

      strDate1 = savedAsOf
      filename_list1 = [n.replace('DATE', strDate1) for n in base_filename_list]

      strDate2 = datetime.strptime(savedAsOf, '%Y-%m-%d').strftime('%d%b%y')
      filename_list2 = [n.replace('DATE', strDate2) for n in base_filename_list]
      filename_list = filename_list1 + filename_list2

      path_list =
      for file in os.listdir(directory):
      filename = os.fsdecode(file)
      if filename in filename_list:
      path_list.append(os.path.join(directory, filename))
      continue
      return path_list

      print (len(get_path_list(folderMarket, files_market, strToday)))








      share|improve this question












      share|improve this question




      share|improve this question








      edited Jul 30 at 22:10









      200_success

      123k14143398




      123k14143398









      asked Jul 30 at 21:09









      Vrun

      267




      267




















          2 Answers
          2






          active

          oldest

          votes

















          up vote
          1
          down vote













          Firstly some tips:



          • Your code uses a mixture of snake_case and camelCase, you should stick to a specific style. If you're going to write python, PEP8 states snake_case should be used as the expected style.

          • Your code lacks a if __name__ == "__main__": starting section. It's recommended to have this. As an example, if you don't have this, when you use tools like Sphinx to produce code documentation, it will actually run your code instead of just building the objects. Also, it's kind of expected :-)

          As to the core questions you have, you can use pathlib's Path object to assist with the globbing of files and matching file parameters, as well as the string formatting format function to replace "placeholders" with a certain value. For instance:



          from pathlib import Path


          def get_path_list(directory, base_filename_list, saved_as_of):
          for template in base_filename_list:
          file_date = template.format(saved_as_of)
          print(f"looking for file_date")
          files = Path(directory).glob(file_date)
          for file in files:
          print(str(file))


          if __name__ == "__main__":
          today = "2018-07-25"
          files_market = ['apples_.xml', 'peaches_.xml', 'cucumbers_.xml', 'potatoes_.xml', 'tomatoes..csv']
          get_path_list(".", files_market, today)


          The code above will get you 90% of the way there, but I'll leave that as an exercise for you to complete.



          Good Luck!






          share|improve this answer




























            up vote
            0
            down vote













            Thank you for the pointers, here is all I could master. not sure how to use the format function the way you intended. I think pathlib glob only accepts str, not a list if i interpreted your answer correctly.



            from pathlib import Path
            from datetime import datetime


            def get_path_list(directory, base_filename_list, saved_as_of):
            # list of possible save_as_of date formats
            date = datetime.strptime(saved_as_of, '%Y-%m-%d')
            formatted_dates_list = [saved_as_of,
            date.strftime('%d%b%y'),]
            #extended list with each possible date format combo
            extended_base_filename_list = [f.replace("DATE", d) for f in base_filename_list
            for d in formatted_dates_list]
            globbed_list = [str(file.name) for f in formatted_dates_list
            for file in Path(directory).glob('*' + f + '.*')]
            #intersection of two lists
            return list(set(extended_base_filename_list) & set(globbed_list))


            if __name__ == "__main__":
            today = "2018-07-25"
            folder = "."
            files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatoes_DATE.xml', 'tomatoes.DATE.csv']
            test = get_path_list(folder, files_market, today)
            print(*test)


            this produces:



            apples_2018-07-25.xml tomatos.25Jul18.csv cucumbers_2018-07-25.xml peaches_2018-07-25.xml potatos_2018-07-25.xml





            share|improve this answer





















              Your Answer




              StackExchange.ifUsing("editor", function ()
              return StackExchange.using("mathjaxEditing", function ()
              StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
              StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
              );
              );
              , "mathjax-editing");

              StackExchange.ifUsing("editor", function ()
              StackExchange.using("externalEditor", function ()
              StackExchange.using("snippets", function ()
              StackExchange.snippets.init();
              );
              );
              , "code-snippets");

              StackExchange.ready(function()
              var channelOptions =
              tags: "".split(" "),
              id: "196"
              ;
              initTagRenderer("".split(" "), "".split(" "), channelOptions);

              StackExchange.using("externalEditor", function()
              // Have to fire editor after snippets, if snippets enabled
              if (StackExchange.settings.snippets.snippetsEnabled)
              StackExchange.using("snippets", function()
              createEditor();
              );

              else
              createEditor();

              );

              function createEditor()
              StackExchange.prepareEditor(
              heartbeatType: 'answer',
              convertImagesToLinks: false,
              noModals: false,
              showLowRepImageUploadWarning: true,
              reputationToPostImages: null,
              bindNavPrevention: true,
              postfix: "",
              onDemand: true,
              discardSelector: ".discard-answer"
              ,immediatelyShowMarkdownHelp:true
              );



              );








               

              draft saved


              draft discarded


















              StackExchange.ready(
              function ()
              StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f200620%2flisting-files-whose-names-match-a-pattern-with-a-date%23new-answer', 'question_page');

              );

              Post as a guest






























              2 Answers
              2






              active

              oldest

              votes








              2 Answers
              2






              active

              oldest

              votes









              active

              oldest

              votes






              active

              oldest

              votes








              up vote
              1
              down vote













              Firstly some tips:



              • Your code uses a mixture of snake_case and camelCase, you should stick to a specific style. If you're going to write python, PEP8 states snake_case should be used as the expected style.

              • Your code lacks a if __name__ == "__main__": starting section. It's recommended to have this. As an example, if you don't have this, when you use tools like Sphinx to produce code documentation, it will actually run your code instead of just building the objects. Also, it's kind of expected :-)

              As to the core questions you have, you can use pathlib's Path object to assist with the globbing of files and matching file parameters, as well as the string formatting format function to replace "placeholders" with a certain value. For instance:



              from pathlib import Path


              def get_path_list(directory, base_filename_list, saved_as_of):
              for template in base_filename_list:
              file_date = template.format(saved_as_of)
              print(f"looking for file_date")
              files = Path(directory).glob(file_date)
              for file in files:
              print(str(file))


              if __name__ == "__main__":
              today = "2018-07-25"
              files_market = ['apples_.xml', 'peaches_.xml', 'cucumbers_.xml', 'potatoes_.xml', 'tomatoes..csv']
              get_path_list(".", files_market, today)


              The code above will get you 90% of the way there, but I'll leave that as an exercise for you to complete.



              Good Luck!






              share|improve this answer

























                up vote
                1
                down vote













                Firstly some tips:



                • Your code uses a mixture of snake_case and camelCase, you should stick to a specific style. If you're going to write python, PEP8 states snake_case should be used as the expected style.

                • Your code lacks a if __name__ == "__main__": starting section. It's recommended to have this. As an example, if you don't have this, when you use tools like Sphinx to produce code documentation, it will actually run your code instead of just building the objects. Also, it's kind of expected :-)

                As to the core questions you have, you can use pathlib's Path object to assist with the globbing of files and matching file parameters, as well as the string formatting format function to replace "placeholders" with a certain value. For instance:



                from pathlib import Path


                def get_path_list(directory, base_filename_list, saved_as_of):
                for template in base_filename_list:
                file_date = template.format(saved_as_of)
                print(f"looking for file_date")
                files = Path(directory).glob(file_date)
                for file in files:
                print(str(file))


                if __name__ == "__main__":
                today = "2018-07-25"
                files_market = ['apples_.xml', 'peaches_.xml', 'cucumbers_.xml', 'potatoes_.xml', 'tomatoes..csv']
                get_path_list(".", files_market, today)


                The code above will get you 90% of the way there, but I'll leave that as an exercise for you to complete.



                Good Luck!






                share|improve this answer























                  up vote
                  1
                  down vote










                  up vote
                  1
                  down vote









                  Firstly some tips:



                  • Your code uses a mixture of snake_case and camelCase, you should stick to a specific style. If you're going to write python, PEP8 states snake_case should be used as the expected style.

                  • Your code lacks a if __name__ == "__main__": starting section. It's recommended to have this. As an example, if you don't have this, when you use tools like Sphinx to produce code documentation, it will actually run your code instead of just building the objects. Also, it's kind of expected :-)

                  As to the core questions you have, you can use pathlib's Path object to assist with the globbing of files and matching file parameters, as well as the string formatting format function to replace "placeholders" with a certain value. For instance:



                  from pathlib import Path


                  def get_path_list(directory, base_filename_list, saved_as_of):
                  for template in base_filename_list:
                  file_date = template.format(saved_as_of)
                  print(f"looking for file_date")
                  files = Path(directory).glob(file_date)
                  for file in files:
                  print(str(file))


                  if __name__ == "__main__":
                  today = "2018-07-25"
                  files_market = ['apples_.xml', 'peaches_.xml', 'cucumbers_.xml', 'potatoes_.xml', 'tomatoes..csv']
                  get_path_list(".", files_market, today)


                  The code above will get you 90% of the way there, but I'll leave that as an exercise for you to complete.



                  Good Luck!






                  share|improve this answer













                  Firstly some tips:



                  • Your code uses a mixture of snake_case and camelCase, you should stick to a specific style. If you're going to write python, PEP8 states snake_case should be used as the expected style.

                  • Your code lacks a if __name__ == "__main__": starting section. It's recommended to have this. As an example, if you don't have this, when you use tools like Sphinx to produce code documentation, it will actually run your code instead of just building the objects. Also, it's kind of expected :-)

                  As to the core questions you have, you can use pathlib's Path object to assist with the globbing of files and matching file parameters, as well as the string formatting format function to replace "placeholders" with a certain value. For instance:



                  from pathlib import Path


                  def get_path_list(directory, base_filename_list, saved_as_of):
                  for template in base_filename_list:
                  file_date = template.format(saved_as_of)
                  print(f"looking for file_date")
                  files = Path(directory).glob(file_date)
                  for file in files:
                  print(str(file))


                  if __name__ == "__main__":
                  today = "2018-07-25"
                  files_market = ['apples_.xml', 'peaches_.xml', 'cucumbers_.xml', 'potatoes_.xml', 'tomatoes..csv']
                  get_path_list(".", files_market, today)


                  The code above will get you 90% of the way there, but I'll leave that as an exercise for you to complete.



                  Good Luck!







                  share|improve this answer













                  share|improve this answer



                  share|improve this answer











                  answered Jul 31 at 7:56









                  C. Harley

                  5415




                  5415






















                      up vote
                      0
                      down vote













                      Thank you for the pointers, here is all I could master. not sure how to use the format function the way you intended. I think pathlib glob only accepts str, not a list if i interpreted your answer correctly.



                      from pathlib import Path
                      from datetime import datetime


                      def get_path_list(directory, base_filename_list, saved_as_of):
                      # list of possible save_as_of date formats
                      date = datetime.strptime(saved_as_of, '%Y-%m-%d')
                      formatted_dates_list = [saved_as_of,
                      date.strftime('%d%b%y'),]
                      #extended list with each possible date format combo
                      extended_base_filename_list = [f.replace("DATE", d) for f in base_filename_list
                      for d in formatted_dates_list]
                      globbed_list = [str(file.name) for f in formatted_dates_list
                      for file in Path(directory).glob('*' + f + '.*')]
                      #intersection of two lists
                      return list(set(extended_base_filename_list) & set(globbed_list))


                      if __name__ == "__main__":
                      today = "2018-07-25"
                      folder = "."
                      files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatoes_DATE.xml', 'tomatoes.DATE.csv']
                      test = get_path_list(folder, files_market, today)
                      print(*test)


                      this produces:



                      apples_2018-07-25.xml tomatos.25Jul18.csv cucumbers_2018-07-25.xml peaches_2018-07-25.xml potatos_2018-07-25.xml





                      share|improve this answer

























                        up vote
                        0
                        down vote













                        Thank you for the pointers, here is all I could master. not sure how to use the format function the way you intended. I think pathlib glob only accepts str, not a list if i interpreted your answer correctly.



                        from pathlib import Path
                        from datetime import datetime


                        def get_path_list(directory, base_filename_list, saved_as_of):
                        # list of possible save_as_of date formats
                        date = datetime.strptime(saved_as_of, '%Y-%m-%d')
                        formatted_dates_list = [saved_as_of,
                        date.strftime('%d%b%y'),]
                        #extended list with each possible date format combo
                        extended_base_filename_list = [f.replace("DATE", d) for f in base_filename_list
                        for d in formatted_dates_list]
                        globbed_list = [str(file.name) for f in formatted_dates_list
                        for file in Path(directory).glob('*' + f + '.*')]
                        #intersection of two lists
                        return list(set(extended_base_filename_list) & set(globbed_list))


                        if __name__ == "__main__":
                        today = "2018-07-25"
                        folder = "."
                        files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatoes_DATE.xml', 'tomatoes.DATE.csv']
                        test = get_path_list(folder, files_market, today)
                        print(*test)


                        this produces:



                        apples_2018-07-25.xml tomatos.25Jul18.csv cucumbers_2018-07-25.xml peaches_2018-07-25.xml potatos_2018-07-25.xml





                        share|improve this answer























                          up vote
                          0
                          down vote










                          up vote
                          0
                          down vote









                          Thank you for the pointers, here is all I could master. not sure how to use the format function the way you intended. I think pathlib glob only accepts str, not a list if i interpreted your answer correctly.



                          from pathlib import Path
                          from datetime import datetime


                          def get_path_list(directory, base_filename_list, saved_as_of):
                          # list of possible save_as_of date formats
                          date = datetime.strptime(saved_as_of, '%Y-%m-%d')
                          formatted_dates_list = [saved_as_of,
                          date.strftime('%d%b%y'),]
                          #extended list with each possible date format combo
                          extended_base_filename_list = [f.replace("DATE", d) for f in base_filename_list
                          for d in formatted_dates_list]
                          globbed_list = [str(file.name) for f in formatted_dates_list
                          for file in Path(directory).glob('*' + f + '.*')]
                          #intersection of two lists
                          return list(set(extended_base_filename_list) & set(globbed_list))


                          if __name__ == "__main__":
                          today = "2018-07-25"
                          folder = "."
                          files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatoes_DATE.xml', 'tomatoes.DATE.csv']
                          test = get_path_list(folder, files_market, today)
                          print(*test)


                          this produces:



                          apples_2018-07-25.xml tomatos.25Jul18.csv cucumbers_2018-07-25.xml peaches_2018-07-25.xml potatos_2018-07-25.xml





                          share|improve this answer













                          Thank you for the pointers, here is all I could master. not sure how to use the format function the way you intended. I think pathlib glob only accepts str, not a list if i interpreted your answer correctly.



                          from pathlib import Path
                          from datetime import datetime


                          def get_path_list(directory, base_filename_list, saved_as_of):
                          # list of possible save_as_of date formats
                          date = datetime.strptime(saved_as_of, '%Y-%m-%d')
                          formatted_dates_list = [saved_as_of,
                          date.strftime('%d%b%y'),]
                          #extended list with each possible date format combo
                          extended_base_filename_list = [f.replace("DATE", d) for f in base_filename_list
                          for d in formatted_dates_list]
                          globbed_list = [str(file.name) for f in formatted_dates_list
                          for file in Path(directory).glob('*' + f + '.*')]
                          #intersection of two lists
                          return list(set(extended_base_filename_list) & set(globbed_list))


                          if __name__ == "__main__":
                          today = "2018-07-25"
                          folder = "."
                          files_market = ['apples_DATE.xml', 'peaches_DATE.xml', 'cucumbers_DATE.xml', 'potatoes_DATE.xml', 'tomatoes.DATE.csv']
                          test = get_path_list(folder, files_market, today)
                          print(*test)


                          this produces:



                          apples_2018-07-25.xml tomatos.25Jul18.csv cucumbers_2018-07-25.xml peaches_2018-07-25.xml potatos_2018-07-25.xml






                          share|improve this answer













                          share|improve this answer



                          share|improve this answer











                          answered Aug 1 at 19:25









                          Vrun

                          267




                          267






















                               

                              draft saved


                              draft discarded


























                               


                              draft saved


                              draft discarded














                              StackExchange.ready(
                              function ()
                              StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f200620%2flisting-files-whose-names-match-a-pattern-with-a-date%23new-answer', 'question_page');

                              );

                              Post as a guest













































































                              Popular posts from this blog

                              Greedy Best First Search implementation in Rust

                              Function to Return a JSON Like Objects Using VBA Collections and Arrays

                              C++11 CLH Lock Implementation