lost and found ( for me ? )

DNS : count Top N queries by label with Python

This script counts number of top N queries by label.
label means:

- lable 0 ( all queries are regarded as “.” )
.

- label 1
.com

- lable 2
.foo.com

usage :
python count_queries_by_label_topN.py <lable> <top N> <query log>

query log format.
# head -5 a.txt
local.
local.
www.google.com.
ns2.c0.com.
daisy.ubuntu.com.

label 0, top 3
# ./count_queries_by_label_topN.py 0 3 a.txt
('.', 2081)

label 1, top 3
# ./count_queries_by_label_topN.py 1 3 a.txt
('.com', 1126)
('.localdomain', 632)
('.', 7)

label 2, top 3
# ./count_queries_by_label_topN.py 2 3 a.txt
('.ubuntu.com', 576)
('.c0.com', 435)
('.google.com', 106)

Here is the script I wrote.
There must be more efficient way than this.
# cat -n count_queries_by_label_topN.py
    1  #!/usr/bin/env python
    2
    3  import sys
    4  from operator import itemgetter
    5
    6  dict1 = {}
    7  list1 = []
    8
    9  def count_query(label, topN, querylog):
   10      a = []
   11
   12      with open('%s' % querylog, 'r') as f:
   13          data = f.readlines()
   14          for line in data:
   15              tmp = line.split(".")
   16
   17  # label 2 means .foo.com
   18  # label 3 means .sub.foo.com
   19  #            tmp2 = "." + ".".join(tmp[-3:-1])
   20              c = int(label) * -1 + (-1)
   21              tmp2 = "." + ".".join(tmp[c:len(tmp)-1])
   22              a.append(tmp2)
   23
   24  # eliminate duplicates
   25      b = set(a)
   26
   27  # count duplicates queries
   28      for i in b:
   29  #        print [a.count(i), a.index(i), a[a.index(i)]]
   30          #print {a.index(i), a.count(i)}
   31          dict1[a[a.index(i)]] = a.count(i)
   32
   33  # sort dictionary by values
   34      for i in sorted(dict1.items(), key=itemgetter(1), reverse=True):
   35          #print i
   36          list1.append(i)
   37
   38  # output
   39      if len(list1) == 1:
   40          print list1[0]
   41      else:
   42          for i in range(0, int(topN)):
   43              print list1[i]
   44
   45  if __name__ == '__main__':
   46      argvs = sys.argv
   47      argc = len(argvs)
   48
   49      if argc != 4:
   50          print "Oops :("
   51          print "Usage python count_queries_by_label_topN.py <label> <top N> <query file>"
   52      elif int(argvs[1]) < 0:
   53          print "Oops :("
   54          print "Please specify label greater than or equal to zero"
   55      elif int(argvs[2]) < 1:
   56          print "Oops :("
   57          print "Please specify top N greater than or equal to one"
   58      else:
   59          count_query(argvs[1], argvs[2], argvs[3])

No comments:

Post a Comment

Note: Only a member of this blog may post a comment.