{"id":980,"date":"2018-09-19T09:41:54","date_gmt":"2018-09-19T01:41:54","guid":{"rendered":"http:\/\/www.rain1024.com\/?p=980"},"modified":"2023-08-07T20:57:25","modified_gmt":"2023-08-07T12:57:25","slug":"article134","status":"publish","type":"post","link":"http:\/\/rain1024.com\/index.php\/2018\/09\/19\/article134\/","title":{"rendered":"\u4f7f\u7528MapReduce\u5bf9\u65e5\u5fd7\u6570\u636e\u8fdb\u884c\u7b80\u5355\u7684\u6e05\u7406\u548c\u603b\u7ed3"},"content":{"rendered":"<h1>\u4f7f\u7528MapReduce\u548cspark\u5bf9\u65e5\u5fd7\u6570\u636e\u8fdb\u884c\u7b80\u5355\u7684\u6e05\u7406\u548c\u603b\u7ed3<\/h1>\n<h2>\u9996\u5148\u4f7f\u7528MapReduce\u5bf9\u65e5\u5fd7\u8fdb\u884c\u5206\u5272\uff0c\u5c06time\uff0cip\uff0curl\u63d0\u53d6\u51fa\u6765\uff0c\u5728\u7528reduce\u8fdb\u884c\u4e00\u4e2a\u6574\u5408\uff0c\u6839\u636eip\u5730\u5740\u7684\u51fa\u73b0\u6b21\u6570\uff0c\u6253\u5370\u5230hdfs\u4e2d\u3002\u5728\u6574\u5408\u4e2d\u6211\u4f7f\u7528\u4e86bean\u7ed3\u6784\u6765\u5b58\u50a8\u6570\u636e\uff0cbean\u7ee7\u627f\u4e86WritableComparable\u63a5\u53e3\u3002<\/h2>\n<h2>\u4f7f\u7528\u65f6\u5148\u5c06BaiduLog\u548cLogBean\u4e24\u4e2a\u7c7b\u5bfc\u5165\u9879\u76ee\uff0c\u5e76\u914d\u7f6e\u76f8\u5e94\u7684Maven\u4f9d\u8d56\uff0c\u7136\u540e\u5bfc\u51fa\u9879\u76ee\u7684jar\u5230\u865a\u62df\u673a\u4e2d\uff0c\u5c06\u65e5\u5fd7\u6587\u4ef6\u4e0a\u4f20\u5230HDFS\u4e2d\uff0c\u4f7f\u7528\u547d\u4ee4\u8fd0\u884c<\/h2>\n<p><code>hadoop jar rain-hadoop-1.0-SNAPSHOT.jar com.rain.mapreduce.BaiduLog \/data\/baidu.log \/data\/log\/clean5<\/code><\/p>\n<h2>BaiduLog.java<\/h2>\n<pre><code><br \/>import org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.fs.Path;\nimport org.apache.hadoop.io.IntWritable;\nimport org.apache.hadoop.io.LongWritable;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.mapreduce.Job;\nimport org.apache.hadoop.mapreduce.Mapper;\nimport org.apache.hadoop.mapreduce.Reducer;\nimport org.apache.hadoop.mapreduce.lib.input.FileInputFormat;\nimport org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;\n\nimport java.io.IOException;\n\npublic class BaiduLog {\n    public static class BaiduLogMapper extends Mapper&lt;LongWritable,Text, Text, LogBean&gt; {\n        @Override\n        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {\n\/\/            super.map(key, value, context);\n            String log = value.toString();\n            String str = \"(cn.baidu.core.inteceptor.LogInteceptor:55)\";\n            if (log.indexOf(str)!=-1){\n                String[] log_arr = log.split(str);\n                String time = log_arr[0].substring(1, 10);\n                String[] log_arr2 = log_arr[1].split(\"\\t\");\n                String ip = log_arr2[1];\n                String url = log_arr2[2];\n                if (url.equals(\"null\")){\n                    url = log_arr2[3];\n                }\n                LogBean logbean = new LogBean(time,ip,url);\n                context.write(new Text(ip),logbean);\n            }\n        }\n    }\n    public static class BaiduLogReducer extends Reducer&lt;Text,LogBean,IntWritable,Text&gt;{\n\n        @Override\n        protected void reduce(Text key, Iterable&lt;LogBean&gt; values, Context context) throws IOException, InterruptedException {\n\/\/            super.reduce(key, values, context);\n            int sum = 0;\n\n            StringBuffer str = new StringBuffer();\n            int flag = 0;\n            for (LogBean logbean:values){\n                sum++;\n                if (flag==0){\n                    str.append(logbean.toString());\n                    flag = 1;\n                }\n            }\n            context.write(new IntWritable(sum),new Text(str.toString()));\n\n        }\n    }\n    public static void main(String[] args) throws Exception {\n        Configuration conf = new Configuration();\n        Job job = Job.getInstance(conf, \"avg\");\n\n        job.setJarByClass(BaiduLog.class);\n        job.setMapperClass(BaiduLog.BaiduLogMapper.class);\n        job.setReducerClass(BaiduLog.BaiduLogReducer.class);\n\n\/\/        job.setCombinerClass(BaiduLog.BaiduLogReducer.class);\n\n        job.setOutputKeyClass(Text.class);\n        job.setOutputValueClass(LogBean.class);\n\n        FileInputFormat.addInputPath(job,new Path(args[0]));\n        FileOutputFormat.setOutputPath(job,new Path(args[1]));\n        System.exit(job.waitForCompletion(true)?0:1);\n    }\n\n}\n\n<\/code><\/pre>\n<h2>LogBean.java<\/h2>\n<pre><code><br \/>import org.apache.hadoop.io.Writable;\nimport org.apache.hadoop.io.WritableComparable;\n\nimport java.io.DataInput;\nimport java.io.DataOutput;\nimport java.io.IOException;\n\npublic class LogBean implements WritableComparable&lt;LogBean&gt; {\n    private String time;\n    private String ip;\n    private String url;\n\n    public LogBean() {\n        super();\n    }\n\n    public LogBean(String time, String ip, String url) {\n        this.time = time;\n        this.ip = ip;\n        this.url = url;\n    }\n\n    @Override\n    public String toString() {\n        return \"LogBean{\" +\n                \"time='\" + time + ' ' +\n                \", ip='\" + ip + ' ' +\n                \", url='\" + url + ' ' +\n                '}';\n    }\n\n    public String getTime() {\n        return time;\n    }\n\n    public void setTime(String time) {\n        this.time = time;\n    }\n\n    public String getIp() {\n        return ip;\n    }\n\n    public void setIp(String ip) {\n        this.ip = ip;\n    }\n\n    public String getUrl() {\n        return url;\n    }\n\n    public void setUrl(String url) {\n        this.url = url;\n    }\n\n\n    @Override\n    public int compareTo(LogBean o) {\n        return 0;\n    }\n\n    @Override\n    public void write(DataOutput out) throws IOException {\n        out.writeUTF(time);\n        out.writeUTF(ip);\n        out.writeUTF(url);\n\n    }\n\n    @Override\n    public void readFields(DataInput in) throws IOException {\n        time = in.readUTF();\n        ip = in.readUTF();\n        url = in.readUTF();\n    }\n}\n\n\n<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u4f7f\u7528MapReduce\u548cspark\u5bf9\u65e5\u5fd7\u6570\u636e\u8fdb\u884c\u7b80\u5355\u7684\u6e05\u7406\u548c\u603b\u7ed3 \u9996\u5148\u4f7f\u7528MapReduce\u5bf9\u65e5\u5fd7\u8fdb\u884c\u5206\u5272\uff0c\u5c06\u2026 <span class=\"read-more\"><a href=\"http:\/\/rain1024.com\/index.php\/2018\/09\/19\/article134\/\">Read More &raquo;<\/a><\/span><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[3,9],"tags":[67],"class_list":["post-980","post","type-post","status-publish","format-standard","hentry","category-hadoop","category-mapreduce","tag-67"],"_links":{"self":[{"href":"http:\/\/rain1024.com\/index.php\/wp-json\/wp\/v2\/posts\/980","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/rain1024.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/rain1024.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/rain1024.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/rain1024.com\/index.php\/wp-json\/wp\/v2\/comments?post=980"}],"version-history":[{"count":1,"href":"http:\/\/rain1024.com\/index.php\/wp-json\/wp\/v2\/posts\/980\/revisions"}],"predecessor-version":[{"id":1398,"href":"http:\/\/rain1024.com\/index.php\/wp-json\/wp\/v2\/posts\/980\/revisions\/1398"}],"wp:attachment":[{"href":"http:\/\/rain1024.com\/index.php\/wp-json\/wp\/v2\/media?parent=980"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/rain1024.com\/index.php\/wp-json\/wp\/v2\/categories?post=980"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/rain1024.com\/index.php\/wp-json\/wp\/v2\/tags?post=980"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}