代码:
# cat pv_hour.py #!/usr/bin/env python# coding=utf-8from mrjob.job import MRJobfrom nginx_accesslog_parser import NginxLineParserclass PvDay(MRJob): nginx_line_parser = NginxLineParser() def mapper(self, _, line): self.nginx_line_parser.parse(line) _, tm = str(self.nginx_line_parser.time_local).split() h, m, s = tm.split(':') yield h, 1 # 每小时的 def reducer(self, key, values): yield key, sum(values)def main(): PvDay.run()if __name__ == '__main__': main()
执行结果
# python3 pv_hour.py access_all.log-20161227 No configs found; falling back on auto-configurationCreating temp directory /tmp/pv_hour.root.20161228.025503.341576Running step 1 of 1...Streaming final output from /tmp/pv_hour.root.20161228.025503.341576/output..."14" 21158"15" 20958"16" 16080"17" 14194"18" 13114"19" 16898"20" 18870"21" 14067"22" 14053"23" 12683"00" 13185"01" 14785"02" 12449"03" 7364"04" 3628"05" 9074"06" 9317"07" 11887"08" 13492"09" 19564"10" 18390"11" 15697"12" 17518"13" 18785Removing temp directory /tmp/pv_hour.root.20161228.025503.341576...