今天,这个是spark的高级算子的讲解的最后一个章节,今天我们来介绍几个简单的算子,
countByKey
val rdd1 = sc.parallelize(List(("a", 1), ("b", 2), ("b", 2), ("c", 2), ("c", 1)))
rdd1.countByKeyrdd1.countByValue-------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------filterByRangeval rdd1 = sc.parallelize(List(("e", 5), ("c", 3), ("d", 4), ("c", 2), ("a", 1)))
val rdd2 = rdd1.filterByRange("b", "d")rdd2.collect-------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------flatMapValues : Array((a,1), (a,2), (b,3), (b,4))val rdd3 = sc.parallelize(List(("a", "1 2"), ("b", "3 4")))val rdd4 = rdd3.flatMapValues(_.split(" "))rdd4.collect-------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------foldByKeyval rdd1 = sc.parallelize(List("dog", "wolf", "cat", "bear"), 2)
val rdd2 = rdd1.map(x => (x.length, x))val rdd3 = rdd2.foldByKey("")(_+_)val rdd = sc.textFile("hdfs://node-1.itcast.cn:9000/wc").flatMap(_.split(" ")).map((_, 1))
rdd.foldByKey(0)(_+_)-------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------foreachPartitionval rdd1 = sc.parallelize(List(1, 2, 3, 4, 5, 6, 7, 8, 9), 3)rdd1.foreachPartition(x => println(x.reduce(_ + _)))-------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------keyBy : 以传入的参数做keyval rdd1 = sc.parallelize(List("dog", "salmon", "salmon", "rat", "elephant"), 3)val rdd2 = rdd1.keyBy(_.length)rdd2.collect-------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------keys valuesval rdd1 = sc.parallelize(List("dog", "tiger", "lion", "cat", "panther", "eagle"), 2)val rdd2 = rdd1.map(x => (x.length, x))rdd2.keys.collectrdd2.values.collect