pastebin - collaborative debugging

pastebin is a collaborative debugging tool allowing you to share and modify code snippets while chatting on IRC, IM or a message board.

This site is developed to XHTML and CSS2 W3C standards. If you see this paragraph, your browser does not support those standards and you need to upgrade. Visit WaSP for a variety of options.

mefi private pastebin - collaborative debugging tool What's a private pastebin?


Posted by signal on Wed 23 Jul 04:17
report abuse | download | new post

  1. def MeFiTags(path='tagdata_mefi.txt'):
  2.  
  3.     tags = {}
  4.     taglist = {}
  5.     data = open(path)
  6.     data.readline()
  7.     data.readline()
  8.     freq_max = 0
  9.     factor_min = 0.01
  10.     factor_max = 1.0
  11.     for line in data:
  12.         (tag_id, link_id, link_date, tag_name) = line.split('\t')
  13.         tag = tag_name[:-1].lower()
  14.         if "brokenlink" in tag:
  15.             continue
  16.         tags.setdefault(link_id,[])
  17.         tags[link_id].append(tag)
  18.         frequency = taglist.get(tag,0)
  19.         if frequency>freq_max:
  20.             freq_max = frequency
  21.         taglist[tag] = frequency + 1
  22.  
  23.  
  24.     computed_min = freq_max * factor_min
  25.     computed_max = freq_max * factor_max
  26.  
  27.     taglist_inrange = []
  28.  
  29.     ct = 0
  30.     for tag in taglist:
  31.         if computed_min < taglist[tag] < computed_max:
  32.             taglist_inrange.append(tag)
  33.             ct += 1
  34.         elif taglist[tag]>computed_max:
  35.             print tag, taglist[tag]
  36.     print "total tags in range", ct
  37.  
  38.     # 'tags' dict link_id => list of tag_names
  39.     # 'taglist_inrange' list all tags within max and min
  40.     return tags, taglist_inrange
  41.  
  42.  
  43.  
  44.  
  45. def MeFiUsers(path='usernames.txt'):
  46.  
  47.     users ={}
  48.     data = open(path)
  49.     data.readline()
  50.     data.readline()
  51.  
  52.     for line in data:
  53.         (userid, joindate, name)= line.split('\t')
  54.         users[userid]=name[:-1]
  55.  
  56.     # dict userid => username
  57.     return users
  58.  
  59. def MeFiPosts(path='postdata_mefi.txt'):
  60.     # dict
  61.     posts = {}
  62.     min_posts = 5
  63.     data = open(path)
  64.     # jump 2 lines
  65.     data.readline()
  66.     data.readline()
  67.  
  68.     for line in data:
  69.         (postid, userid)=line.split('\t')[:2]
  70.         posts.setdefault(userid,[])
  71.         posts[userid].append(postid)
  72.  
  73.     posts_valid = {}
  74.     for userid in posts:
  75.         if len(posts[userid]) > min_posts:
  76.             posts_valid[userid]=posts[userid]
  77.  
  78.     # dict userid => list of postids
  79.     return posts_valid
  80.  
  81.  
  82. def writeMefiUserTags():
  83.     tags, taglist = MeFiTags()
  84.     users = MeFiUsers()
  85.     posts = MeFiPosts()
  86.  
  87.     usertags = {}
  88.  
  89.  
  90.     out = file('mefidata.txt','w')
  91.     w = out.write
  92.     w('User')
  93.     for tag in taglist:
  94.         w('\t%s' % tag)
  95.     w('\n')
  96.  
  97.     userct = ct = 0
  98.  
  99.     for user_id in users:
  100.         if posts.has_key(user_id):      # if user has posted
  101.             w(users[user_id])           # write user name
  102.             user_posts = posts[user_id] # get user's posts
  103.             user_tags = {}
  104.             for post_id in user_posts:
  105.                 if tags.has_key(post_id):       # if post has tags
  106.                     for tag in tags[post_id]:   # for each tag
  107.                         user_tags.setdefault(tag,0)
  108.                         user_tags[tag]+=1       # increment user's score
  109.             for tag in taglist:
  110.                 if user_tags.has_key(tag):      # if user has tag
  111.                     w('\t%d' % user_tags[tag])   # print score
  112.                 else:
  113.                     w('\t0')                    # print 0
  114.             w('\n')
  115.             if ct%100 == 0:
  116.                 print "%s users written" % ct
  117.             ct += 1
  118.             userct += 1
  119.     out.close()
  120.     print userct, "users"
  121.  
  122. if __name__ == "__main__":
  123.    writeMefiUserTags()

Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:

To highlight particular lines, prefix each line with @@


Remember me