from csv import reader


hn = list(reader(open("hacker_news.csv")))
print('The first five rows of HN dataset')
print(hn[:5])

headers = hn[0]
hn = hn[1:]
print('The header for HN dataset',headers)
print('First five rows of HN dataset after cleaning', hn[:5])

The first five rows of HN dataset
[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'], ['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01']]
The header for HN dataset ['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']
First five rows of HN dataset after cleaning [['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01'], ['10301696', 'Note by Note: The Making of Steinway L1037 (2007)', 'http://www.nytimes.com/2007/11/07/movies/07stein.html?_r=0', '8', '2', 'walterbell', '9/30/2015 4:12']]


ask_posts = []
show_posts = []
other_posts = []

for row in hn:
    title = row[1]
    if title.lower().startswith('ask hn'):
        ask_posts.append(row)
    elif title.lower().startswith('show hn'):
        show_posts.append(row)
    else:
        other_posts.append(row)
        
        
print('The number of ask posts: ',len(ask_posts))
print('The number of show posts: ',len(show_posts))
print('The number of other posts: ',len(other_posts))

The number of ask posts:  1744
The number of show posts:  1162
The number of other posts:  17194


######### Avg number of comments in ask HN posts ##########
total_ask_comments = 0
for row in ask_posts:
    total_ask_comments += int(row[4])

avg_ask_comments = total_ask_comments / len(ask_posts)
print('The average no. of comments for Ask HN posts:',avg_ask_comments)


######### Avg number of comments in show HN posts ##########
total_show_comments = 0
for row in show_posts:
    total_show_comments +=int(row[4])

avg_show_comments = total_show_comments / len(show_posts)
print('The average no. of comments for Show HN posts:',avg_show_comments)

The average no. of comments for Ask HN posts: 14.038417431192661
The average no. of comments for Show HN posts: 10.31669535283993


import datetime as dt

result_list = []

for row in ask_posts:
    created_at = row[6]
    num_comments = int(row[4])
    result_list.append([created_at,num_comments])

counts_by_hour = {}
comments_by_hour = {}

for row in result_list:
    created_dt = dt.datetime.strptime(row[0],"%m/%d/%Y %H:%M")
    hr = created_dt.strftime("%H")
    
    if hr not in counts_by_hour:
        counts_by_hour[hr] = 1
        comments_by_hour[hr] = row[1]
    else:
        counts_by_hour[hr] +=1
        comments_by_hour[hr] +=row[1]
print(comments_by_hour)

{'21': 1745, '09': 251, '16': 1814, '04': 337, '07': 267, '10': 793, '06': 397, '08': 492, '14': 1416, '12': 687, '17': 1146, '23': 543, '03': 421, '05': 464, '20': 1722, '01': 683, '22': 479, '02': 1381, '18': 1439, '00': 447, '15': 4477, '13': 1253, '11': 641, '19': 1188}


avg_by_hour = []

for h in comments_by_hour:
    avg_by_hour.append([h, comments_by_hour[h] / counts_by_hour[h]])

print(avg_by_hour)

[['21', 16.009174311926607], ['09', 5.5777777777777775], ['16', 16.796296296296298], ['04', 7.170212765957447], ['07', 7.852941176470588], ['10', 13.440677966101696], ['06', 9.022727272727273], ['08', 10.25], ['14', 13.233644859813085], ['12', 9.41095890410959], ['17', 11.46], ['23', 7.985294117647059], ['03', 7.796296296296297], ['05', 10.08695652173913], ['20', 21.525], ['01', 11.383333333333333], ['22', 6.746478873239437], ['02', 23.810344827586206], ['18', 13.20183486238532], ['00', 8.127272727272727], ['15', 38.5948275862069], ['13', 14.741176470588234], ['11', 11.051724137931034], ['19', 10.8]]


swap_avg_by_hour = []

for row in avg_by_hour:
    swap_avg_by_hour.append([row[1],row[0]])

print(swap_avg_by_hour)


sorted_swap = sorted(swap_avg_by_hour,reverse= True)

[[16.009174311926607, '21'], [5.5777777777777775, '09'], [16.796296296296298, '16'], [7.170212765957447, '04'], [7.852941176470588, '07'], [13.440677966101696, '10'], [9.022727272727273, '06'], [10.25, '08'], [13.233644859813085, '14'], [9.41095890410959, '12'], [11.46, '17'], [7.985294117647059, '23'], [7.796296296296297, '03'], [10.08695652173913, '05'], [21.525, '20'], [11.383333333333333, '01'], [6.746478873239437, '22'], [23.810344827586206, '02'], [13.20183486238532, '18'], [8.127272727272727, '00'], [38.5948275862069, '15'], [14.741176470588234, '13'], [11.051724137931034, '11'], [10.8, '19']]


for row in sorted_swap[:5]:
    print("{}:00: {:.2f} average comments per post".format(row[1],row[0]))

15:00: 38.59 average comments per post
02:00: 23.81 average comments per post
20:00: 21.52 average comments per post
16:00: 16.80 average comments per post
21:00: 16.01 average comments per post

Exploring Hacker News Posts¶

Extracting Ask HN and Show HN posts¶

Calculating the average number of comments for Ask HN and Show HN Posts¶

Amount of Ask Posts and Comments by Hour Created¶

Conclusion¶