Twitter data mining project

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
4
down vote

favorite












I've written this simple code, that is possible to find on my github profile (it simply downloads some twitter data and store in an excel file).



It is one of my first experiments of using OOP in python: the code works (does its tasks as intended) but maybe an OOP implementation is not explicitly needed in this case.



Can I show this code as part of my coder portfolio as it is, or need some adjustment?



This is the code:



main.py



from twitterHandler import Twitter_User
import pandas as pd
import threading




if __name__=='__main__':
usersIDS = 'UserName1':1234,'UserName2':1234,
'UserName3':1234,'UserName4':1234
threads =
excel_writer = pd.ExcelWriter("Twitter User's Report.xlsx", engine='openpyxl')

def get_data(user_id): #get best last tweets and store them in a excel file
user = Twitter_User.Twitter_User(user_id,2000)
user.get_tweets()
#print(user.get_tweets())
best_tweets = user.most_liked_rt()
#print(best_tweets)
best_tweets.to_excel(excel_writer, ''.format(user.name),index=False)
excel_writer.save()
excel_writer.close()


for user_name,user_id in usersIDS.items():
try:
t = threading.Thread(target=get_data,args=(user_id,))
threads[user_name] = t
print('Starting to get data for: '.format(user_name))
t.start()
except Exception as e:
print('Something wrong happens: ',e)


for name,t in threads.items():
t.join()
print('Process for Stopped'.format(name))


TwitterUser.py



import tweepy
import pandas as pd
import twitterHandler.twitter_data
import collections


auth = tweepy.OAuthHandler(twitterHandler.twitter_data.consumer_key, twitterHandler.twitter_data.consumer_secret)
auth.set_access_token(twitterHandler.twitter_data.access_token, twitterHandler.twitter_data.access_secret)
api = tweepy.API(auth)


class Twitter_User():
'''Get information about a specific Twitter User'''
def __init__(self, id, count=200):
self.id = id
self.count = count
self.data = None
self.like_average = None
self.rt_average = None
try:
self.user = api.get_user(self.id)
self.name = self._user._json['name']
self.screen_name = self._user._json['screen_name']
self.location = self._user._json['location']
self.description = self._user._json['description']
self.url = self._user._json['url']
self.followers = self._user._json['followers_count']
except tweepy.TweepError as e:
print(e.response.text)
return
except tweepy.RateLimitError:
rate = api.rate_limit_status()
print(rate)
return

def get_tweets(self): #store last n tweets in a dataframe
simple_list =
for status in tweepy.Cursor(api.user_timeline, id=self.id).items(self.count):
array = [status._json["text"].strip(), status._json["favorite_count"],
status._json["created_at"], status._json["retweet_count"],
[h["text"] for h in status._json["entities"]["hashtags"]],status._json["lang"]]
simple_list.append(array)
self.data = pd.DataFrame(simple_list, columns=["Text", "Like", "Created at", "Retweet", "Hashtags","Lang"])
self.data = self.data[~self.data["Text"].str.startswith('RT')]
return self.data


def most_liked_rt(self): #return a df of tweets where the number of like and rt is greater than respective averages
self.like_average = self.data["Like"].mean()
self.rt_average = self.data["Retweet"].mean()
return self.data.loc[(self.data['Like'] > self.like_average) & (self.data['Retweet'] > self.rt_average)]

def count_hashtags(self,df): #give the most used hashtags in the tweets df - to use with the return df of most_liked_rt()
h_tags_cloud =
h_tags = df[['Hashtags', 'Created at']]
h_tags = h_tags[h_tags["Hashtags"].map(len) != 0]
h_tags_list = h_tags["Hashtags"].tolist()
h_tags_counter = collections.Counter()
for h_inner_list in h_tags_list:
for h_element in h_inner_list:
h_tags_cloud.append(h_element)
h_tags_counter.update(h_tags_cloud)
h_tags_df = pd.DataFrame.from_dict(h_tags_counter,orient="index").sort_values(0,ascending=False)
h_tags_df = h_tags_df.rename(columns=0: 'HashTags Freq')
return h_tags_df


TwitterData.py



consumer_key = ''
consumer_secret = ''
access_token = ''
access_secret = ''






share|improve this question



























    up vote
    4
    down vote

    favorite












    I've written this simple code, that is possible to find on my github profile (it simply downloads some twitter data and store in an excel file).



    It is one of my first experiments of using OOP in python: the code works (does its tasks as intended) but maybe an OOP implementation is not explicitly needed in this case.



    Can I show this code as part of my coder portfolio as it is, or need some adjustment?



    This is the code:



    main.py



    from twitterHandler import Twitter_User
    import pandas as pd
    import threading




    if __name__=='__main__':
    usersIDS = 'UserName1':1234,'UserName2':1234,
    'UserName3':1234,'UserName4':1234
    threads =
    excel_writer = pd.ExcelWriter("Twitter User's Report.xlsx", engine='openpyxl')

    def get_data(user_id): #get best last tweets and store them in a excel file
    user = Twitter_User.Twitter_User(user_id,2000)
    user.get_tweets()
    #print(user.get_tweets())
    best_tweets = user.most_liked_rt()
    #print(best_tweets)
    best_tweets.to_excel(excel_writer, ''.format(user.name),index=False)
    excel_writer.save()
    excel_writer.close()


    for user_name,user_id in usersIDS.items():
    try:
    t = threading.Thread(target=get_data,args=(user_id,))
    threads[user_name] = t
    print('Starting to get data for: '.format(user_name))
    t.start()
    except Exception as e:
    print('Something wrong happens: ',e)


    for name,t in threads.items():
    t.join()
    print('Process for Stopped'.format(name))


    TwitterUser.py



    import tweepy
    import pandas as pd
    import twitterHandler.twitter_data
    import collections


    auth = tweepy.OAuthHandler(twitterHandler.twitter_data.consumer_key, twitterHandler.twitter_data.consumer_secret)
    auth.set_access_token(twitterHandler.twitter_data.access_token, twitterHandler.twitter_data.access_secret)
    api = tweepy.API(auth)


    class Twitter_User():
    '''Get information about a specific Twitter User'''
    def __init__(self, id, count=200):
    self.id = id
    self.count = count
    self.data = None
    self.like_average = None
    self.rt_average = None
    try:
    self.user = api.get_user(self.id)
    self.name = self._user._json['name']
    self.screen_name = self._user._json['screen_name']
    self.location = self._user._json['location']
    self.description = self._user._json['description']
    self.url = self._user._json['url']
    self.followers = self._user._json['followers_count']
    except tweepy.TweepError as e:
    print(e.response.text)
    return
    except tweepy.RateLimitError:
    rate = api.rate_limit_status()
    print(rate)
    return

    def get_tweets(self): #store last n tweets in a dataframe
    simple_list =
    for status in tweepy.Cursor(api.user_timeline, id=self.id).items(self.count):
    array = [status._json["text"].strip(), status._json["favorite_count"],
    status._json["created_at"], status._json["retweet_count"],
    [h["text"] for h in status._json["entities"]["hashtags"]],status._json["lang"]]
    simple_list.append(array)
    self.data = pd.DataFrame(simple_list, columns=["Text", "Like", "Created at", "Retweet", "Hashtags","Lang"])
    self.data = self.data[~self.data["Text"].str.startswith('RT')]
    return self.data


    def most_liked_rt(self): #return a df of tweets where the number of like and rt is greater than respective averages
    self.like_average = self.data["Like"].mean()
    self.rt_average = self.data["Retweet"].mean()
    return self.data.loc[(self.data['Like'] > self.like_average) & (self.data['Retweet'] > self.rt_average)]

    def count_hashtags(self,df): #give the most used hashtags in the tweets df - to use with the return df of most_liked_rt()
    h_tags_cloud =
    h_tags = df[['Hashtags', 'Created at']]
    h_tags = h_tags[h_tags["Hashtags"].map(len) != 0]
    h_tags_list = h_tags["Hashtags"].tolist()
    h_tags_counter = collections.Counter()
    for h_inner_list in h_tags_list:
    for h_element in h_inner_list:
    h_tags_cloud.append(h_element)
    h_tags_counter.update(h_tags_cloud)
    h_tags_df = pd.DataFrame.from_dict(h_tags_counter,orient="index").sort_values(0,ascending=False)
    h_tags_df = h_tags_df.rename(columns=0: 'HashTags Freq')
    return h_tags_df


    TwitterData.py



    consumer_key = ''
    consumer_secret = ''
    access_token = ''
    access_secret = ''






    share|improve this question























      up vote
      4
      down vote

      favorite









      up vote
      4
      down vote

      favorite











      I've written this simple code, that is possible to find on my github profile (it simply downloads some twitter data and store in an excel file).



      It is one of my first experiments of using OOP in python: the code works (does its tasks as intended) but maybe an OOP implementation is not explicitly needed in this case.



      Can I show this code as part of my coder portfolio as it is, or need some adjustment?



      This is the code:



      main.py



      from twitterHandler import Twitter_User
      import pandas as pd
      import threading




      if __name__=='__main__':
      usersIDS = 'UserName1':1234,'UserName2':1234,
      'UserName3':1234,'UserName4':1234
      threads =
      excel_writer = pd.ExcelWriter("Twitter User's Report.xlsx", engine='openpyxl')

      def get_data(user_id): #get best last tweets and store them in a excel file
      user = Twitter_User.Twitter_User(user_id,2000)
      user.get_tweets()
      #print(user.get_tweets())
      best_tweets = user.most_liked_rt()
      #print(best_tweets)
      best_tweets.to_excel(excel_writer, ''.format(user.name),index=False)
      excel_writer.save()
      excel_writer.close()


      for user_name,user_id in usersIDS.items():
      try:
      t = threading.Thread(target=get_data,args=(user_id,))
      threads[user_name] = t
      print('Starting to get data for: '.format(user_name))
      t.start()
      except Exception as e:
      print('Something wrong happens: ',e)


      for name,t in threads.items():
      t.join()
      print('Process for Stopped'.format(name))


      TwitterUser.py



      import tweepy
      import pandas as pd
      import twitterHandler.twitter_data
      import collections


      auth = tweepy.OAuthHandler(twitterHandler.twitter_data.consumer_key, twitterHandler.twitter_data.consumer_secret)
      auth.set_access_token(twitterHandler.twitter_data.access_token, twitterHandler.twitter_data.access_secret)
      api = tweepy.API(auth)


      class Twitter_User():
      '''Get information about a specific Twitter User'''
      def __init__(self, id, count=200):
      self.id = id
      self.count = count
      self.data = None
      self.like_average = None
      self.rt_average = None
      try:
      self.user = api.get_user(self.id)
      self.name = self._user._json['name']
      self.screen_name = self._user._json['screen_name']
      self.location = self._user._json['location']
      self.description = self._user._json['description']
      self.url = self._user._json['url']
      self.followers = self._user._json['followers_count']
      except tweepy.TweepError as e:
      print(e.response.text)
      return
      except tweepy.RateLimitError:
      rate = api.rate_limit_status()
      print(rate)
      return

      def get_tweets(self): #store last n tweets in a dataframe
      simple_list =
      for status in tweepy.Cursor(api.user_timeline, id=self.id).items(self.count):
      array = [status._json["text"].strip(), status._json["favorite_count"],
      status._json["created_at"], status._json["retweet_count"],
      [h["text"] for h in status._json["entities"]["hashtags"]],status._json["lang"]]
      simple_list.append(array)
      self.data = pd.DataFrame(simple_list, columns=["Text", "Like", "Created at", "Retweet", "Hashtags","Lang"])
      self.data = self.data[~self.data["Text"].str.startswith('RT')]
      return self.data


      def most_liked_rt(self): #return a df of tweets where the number of like and rt is greater than respective averages
      self.like_average = self.data["Like"].mean()
      self.rt_average = self.data["Retweet"].mean()
      return self.data.loc[(self.data['Like'] > self.like_average) & (self.data['Retweet'] > self.rt_average)]

      def count_hashtags(self,df): #give the most used hashtags in the tweets df - to use with the return df of most_liked_rt()
      h_tags_cloud =
      h_tags = df[['Hashtags', 'Created at']]
      h_tags = h_tags[h_tags["Hashtags"].map(len) != 0]
      h_tags_list = h_tags["Hashtags"].tolist()
      h_tags_counter = collections.Counter()
      for h_inner_list in h_tags_list:
      for h_element in h_inner_list:
      h_tags_cloud.append(h_element)
      h_tags_counter.update(h_tags_cloud)
      h_tags_df = pd.DataFrame.from_dict(h_tags_counter,orient="index").sort_values(0,ascending=False)
      h_tags_df = h_tags_df.rename(columns=0: 'HashTags Freq')
      return h_tags_df


      TwitterData.py



      consumer_key = ''
      consumer_secret = ''
      access_token = ''
      access_secret = ''






      share|improve this question













      I've written this simple code, that is possible to find on my github profile (it simply downloads some twitter data and store in an excel file).



      It is one of my first experiments of using OOP in python: the code works (does its tasks as intended) but maybe an OOP implementation is not explicitly needed in this case.



      Can I show this code as part of my coder portfolio as it is, or need some adjustment?



      This is the code:



      main.py



      from twitterHandler import Twitter_User
      import pandas as pd
      import threading




      if __name__=='__main__':
      usersIDS = 'UserName1':1234,'UserName2':1234,
      'UserName3':1234,'UserName4':1234
      threads =
      excel_writer = pd.ExcelWriter("Twitter User's Report.xlsx", engine='openpyxl')

      def get_data(user_id): #get best last tweets and store them in a excel file
      user = Twitter_User.Twitter_User(user_id,2000)
      user.get_tweets()
      #print(user.get_tweets())
      best_tweets = user.most_liked_rt()
      #print(best_tweets)
      best_tweets.to_excel(excel_writer, ''.format(user.name),index=False)
      excel_writer.save()
      excel_writer.close()


      for user_name,user_id in usersIDS.items():
      try:
      t = threading.Thread(target=get_data,args=(user_id,))
      threads[user_name] = t
      print('Starting to get data for: '.format(user_name))
      t.start()
      except Exception as e:
      print('Something wrong happens: ',e)


      for name,t in threads.items():
      t.join()
      print('Process for Stopped'.format(name))


      TwitterUser.py



      import tweepy
      import pandas as pd
      import twitterHandler.twitter_data
      import collections


      auth = tweepy.OAuthHandler(twitterHandler.twitter_data.consumer_key, twitterHandler.twitter_data.consumer_secret)
      auth.set_access_token(twitterHandler.twitter_data.access_token, twitterHandler.twitter_data.access_secret)
      api = tweepy.API(auth)


      class Twitter_User():
      '''Get information about a specific Twitter User'''
      def __init__(self, id, count=200):
      self.id = id
      self.count = count
      self.data = None
      self.like_average = None
      self.rt_average = None
      try:
      self.user = api.get_user(self.id)
      self.name = self._user._json['name']
      self.screen_name = self._user._json['screen_name']
      self.location = self._user._json['location']
      self.description = self._user._json['description']
      self.url = self._user._json['url']
      self.followers = self._user._json['followers_count']
      except tweepy.TweepError as e:
      print(e.response.text)
      return
      except tweepy.RateLimitError:
      rate = api.rate_limit_status()
      print(rate)
      return

      def get_tweets(self): #store last n tweets in a dataframe
      simple_list =
      for status in tweepy.Cursor(api.user_timeline, id=self.id).items(self.count):
      array = [status._json["text"].strip(), status._json["favorite_count"],
      status._json["created_at"], status._json["retweet_count"],
      [h["text"] for h in status._json["entities"]["hashtags"]],status._json["lang"]]
      simple_list.append(array)
      self.data = pd.DataFrame(simple_list, columns=["Text", "Like", "Created at", "Retweet", "Hashtags","Lang"])
      self.data = self.data[~self.data["Text"].str.startswith('RT')]
      return self.data


      def most_liked_rt(self): #return a df of tweets where the number of like and rt is greater than respective averages
      self.like_average = self.data["Like"].mean()
      self.rt_average = self.data["Retweet"].mean()
      return self.data.loc[(self.data['Like'] > self.like_average) & (self.data['Retweet'] > self.rt_average)]

      def count_hashtags(self,df): #give the most used hashtags in the tweets df - to use with the return df of most_liked_rt()
      h_tags_cloud =
      h_tags = df[['Hashtags', 'Created at']]
      h_tags = h_tags[h_tags["Hashtags"].map(len) != 0]
      h_tags_list = h_tags["Hashtags"].tolist()
      h_tags_counter = collections.Counter()
      for h_inner_list in h_tags_list:
      for h_element in h_inner_list:
      h_tags_cloud.append(h_element)
      h_tags_counter.update(h_tags_cloud)
      h_tags_df = pd.DataFrame.from_dict(h_tags_counter,orient="index").sort_values(0,ascending=False)
      h_tags_df = h_tags_df.rename(columns=0: 'HashTags Freq')
      return h_tags_df


      TwitterData.py



      consumer_key = ''
      consumer_secret = ''
      access_token = ''
      access_secret = ''








      share|improve this question












      share|improve this question




      share|improve this question








      edited Jan 24 at 17:56









      200_success

      123k14143401




      123k14143401









      asked Jan 24 at 13:44









      Lime In The Coconut

      1283




      1283




















          1 Answer
          1






          active

          oldest

          votes

















          up vote
          5
          down vote



          accepted










          I have spend a few moments with your project and I have a few things which should be improved (in my opinion ofc).




          1. Use pep8 as your code formatting tool. If somebody will be reviewing your code this will be the first thing which she/he notice. There is a lot of empty lines, wrong formating etc.


          2. Improve your README.md. Add more info to your project descriptin. Get some data from a Twitter user is not enough :) Add some description for your API, maybe try to add some badges for codecov etc.


          3. Hardcoded value - this is bad smell. Example:


          usersIDS = 'UserName1':1234,'UserName2':1234,
          'UserName3':1234,'UserName4':1234





          1. Use logger - using print is good, but if you want to look more professional use logger ;) Also add more descriptive message


          2. Too broad exceptions I know - sometimes this is impossible, but it looks like in few places it can be improved. For example line 31 in main.py. This looks weird for me, and would check what exactly can be broken here.

          3. Remove code which is comment - please care what you show other people


          4. Add unit tests - This is very very important. Add some unit tests. The best thing would be use TDD to create your app (Knowing what is TDD is very important on job interview)


          5. Add more descriptive comments



          6. Naming of variables - I know - this is very very hard, but your should not use naming like h_tags.

          Also:
          What is this:




          h_tags = h_tags[h_tags["Hashtags"].map(len) != 0]




          It looks like a bug. Even if this do what should do, then it looks very weird and in my opinion should be refactored for something more readable.



          Also your constructor looks weird. Catching exceptions in constructor is kind of bad smell.






          share|improve this answer





















            Your Answer




            StackExchange.ifUsing("editor", function ()
            return StackExchange.using("mathjaxEditing", function ()
            StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
            StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
            );
            );
            , "mathjax-editing");

            StackExchange.ifUsing("editor", function ()
            StackExchange.using("externalEditor", function ()
            StackExchange.using("snippets", function ()
            StackExchange.snippets.init();
            );
            );
            , "code-snippets");

            StackExchange.ready(function()
            var channelOptions =
            tags: "".split(" "),
            id: "196"
            ;
            initTagRenderer("".split(" "), "".split(" "), channelOptions);

            StackExchange.using("externalEditor", function()
            // Have to fire editor after snippets, if snippets enabled
            if (StackExchange.settings.snippets.snippetsEnabled)
            StackExchange.using("snippets", function()
            createEditor();
            );

            else
            createEditor();

            );

            function createEditor()
            StackExchange.prepareEditor(
            heartbeatType: 'answer',
            convertImagesToLinks: false,
            noModals: false,
            showLowRepImageUploadWarning: true,
            reputationToPostImages: null,
            bindNavPrevention: true,
            postfix: "",
            onDemand: true,
            discardSelector: ".discard-answer"
            ,immediatelyShowMarkdownHelp:true
            );



            );








             

            draft saved


            draft discarded


















            StackExchange.ready(
            function ()
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f185876%2ftwitter-data-mining-project%23new-answer', 'question_page');

            );

            Post as a guest






























            1 Answer
            1






            active

            oldest

            votes








            1 Answer
            1






            active

            oldest

            votes









            active

            oldest

            votes






            active

            oldest

            votes








            up vote
            5
            down vote



            accepted










            I have spend a few moments with your project and I have a few things which should be improved (in my opinion ofc).




            1. Use pep8 as your code formatting tool. If somebody will be reviewing your code this will be the first thing which she/he notice. There is a lot of empty lines, wrong formating etc.


            2. Improve your README.md. Add more info to your project descriptin. Get some data from a Twitter user is not enough :) Add some description for your API, maybe try to add some badges for codecov etc.


            3. Hardcoded value - this is bad smell. Example:


            usersIDS = 'UserName1':1234,'UserName2':1234,
            'UserName3':1234,'UserName4':1234





            1. Use logger - using print is good, but if you want to look more professional use logger ;) Also add more descriptive message


            2. Too broad exceptions I know - sometimes this is impossible, but it looks like in few places it can be improved. For example line 31 in main.py. This looks weird for me, and would check what exactly can be broken here.

            3. Remove code which is comment - please care what you show other people


            4. Add unit tests - This is very very important. Add some unit tests. The best thing would be use TDD to create your app (Knowing what is TDD is very important on job interview)


            5. Add more descriptive comments



            6. Naming of variables - I know - this is very very hard, but your should not use naming like h_tags.

            Also:
            What is this:




            h_tags = h_tags[h_tags["Hashtags"].map(len) != 0]




            It looks like a bug. Even if this do what should do, then it looks very weird and in my opinion should be refactored for something more readable.



            Also your constructor looks weird. Catching exceptions in constructor is kind of bad smell.






            share|improve this answer

























              up vote
              5
              down vote



              accepted










              I have spend a few moments with your project and I have a few things which should be improved (in my opinion ofc).




              1. Use pep8 as your code formatting tool. If somebody will be reviewing your code this will be the first thing which she/he notice. There is a lot of empty lines, wrong formating etc.


              2. Improve your README.md. Add more info to your project descriptin. Get some data from a Twitter user is not enough :) Add some description for your API, maybe try to add some badges for codecov etc.


              3. Hardcoded value - this is bad smell. Example:


              usersIDS = 'UserName1':1234,'UserName2':1234,
              'UserName3':1234,'UserName4':1234





              1. Use logger - using print is good, but if you want to look more professional use logger ;) Also add more descriptive message


              2. Too broad exceptions I know - sometimes this is impossible, but it looks like in few places it can be improved. For example line 31 in main.py. This looks weird for me, and would check what exactly can be broken here.

              3. Remove code which is comment - please care what you show other people


              4. Add unit tests - This is very very important. Add some unit tests. The best thing would be use TDD to create your app (Knowing what is TDD is very important on job interview)


              5. Add more descriptive comments



              6. Naming of variables - I know - this is very very hard, but your should not use naming like h_tags.

              Also:
              What is this:




              h_tags = h_tags[h_tags["Hashtags"].map(len) != 0]




              It looks like a bug. Even if this do what should do, then it looks very weird and in my opinion should be refactored for something more readable.



              Also your constructor looks weird. Catching exceptions in constructor is kind of bad smell.






              share|improve this answer























                up vote
                5
                down vote



                accepted







                up vote
                5
                down vote



                accepted






                I have spend a few moments with your project and I have a few things which should be improved (in my opinion ofc).




                1. Use pep8 as your code formatting tool. If somebody will be reviewing your code this will be the first thing which she/he notice. There is a lot of empty lines, wrong formating etc.


                2. Improve your README.md. Add more info to your project descriptin. Get some data from a Twitter user is not enough :) Add some description for your API, maybe try to add some badges for codecov etc.


                3. Hardcoded value - this is bad smell. Example:


                usersIDS = 'UserName1':1234,'UserName2':1234,
                'UserName3':1234,'UserName4':1234





                1. Use logger - using print is good, but if you want to look more professional use logger ;) Also add more descriptive message


                2. Too broad exceptions I know - sometimes this is impossible, but it looks like in few places it can be improved. For example line 31 in main.py. This looks weird for me, and would check what exactly can be broken here.

                3. Remove code which is comment - please care what you show other people


                4. Add unit tests - This is very very important. Add some unit tests. The best thing would be use TDD to create your app (Knowing what is TDD is very important on job interview)


                5. Add more descriptive comments



                6. Naming of variables - I know - this is very very hard, but your should not use naming like h_tags.

                Also:
                What is this:




                h_tags = h_tags[h_tags["Hashtags"].map(len) != 0]




                It looks like a bug. Even if this do what should do, then it looks very weird and in my opinion should be refactored for something more readable.



                Also your constructor looks weird. Catching exceptions in constructor is kind of bad smell.






                share|improve this answer













                I have spend a few moments with your project and I have a few things which should be improved (in my opinion ofc).




                1. Use pep8 as your code formatting tool. If somebody will be reviewing your code this will be the first thing which she/he notice. There is a lot of empty lines, wrong formating etc.


                2. Improve your README.md. Add more info to your project descriptin. Get some data from a Twitter user is not enough :) Add some description for your API, maybe try to add some badges for codecov etc.


                3. Hardcoded value - this is bad smell. Example:


                usersIDS = 'UserName1':1234,'UserName2':1234,
                'UserName3':1234,'UserName4':1234





                1. Use logger - using print is good, but if you want to look more professional use logger ;) Also add more descriptive message


                2. Too broad exceptions I know - sometimes this is impossible, but it looks like in few places it can be improved. For example line 31 in main.py. This looks weird for me, and would check what exactly can be broken here.

                3. Remove code which is comment - please care what you show other people


                4. Add unit tests - This is very very important. Add some unit tests. The best thing would be use TDD to create your app (Knowing what is TDD is very important on job interview)


                5. Add more descriptive comments



                6. Naming of variables - I know - this is very very hard, but your should not use naming like h_tags.

                Also:
                What is this:




                h_tags = h_tags[h_tags["Hashtags"].map(len) != 0]




                It looks like a bug. Even if this do what should do, then it looks very weird and in my opinion should be refactored for something more readable.



                Also your constructor looks weird. Catching exceptions in constructor is kind of bad smell.







                share|improve this answer













                share|improve this answer



                share|improve this answer











                answered Jan 24 at 14:18









                Mariusz

                1613




                1613






















                     

                    draft saved


                    draft discarded


























                     


                    draft saved


                    draft discarded














                    StackExchange.ready(
                    function ()
                    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f185876%2ftwitter-data-mining-project%23new-answer', 'question_page');

                    );

                    Post as a guest













































































                    Popular posts from this blog

                    Chat program with C++ and SFML

                    Function to Return a JSON Like Objects Using VBA Collections and Arrays

                    Will my employers contract hold up in court?