Spark SQL怎么用

Spark SQL怎么用

这篇文章主要介绍“Spark SQL怎么用”,在日常操作中,相信很多人在Spark SQL怎么用问题上存在疑惑,小编查阅了各式资料,整理出简单好用的操作方法,希望对大家解答”Spark SQL怎么用”的疑惑有所帮助!接下来,请跟着小编一起来学习吧!

pom.xml
<dependencies>

Spark SQL怎么用

  1. <dependency>

  2. <groupId>org.apache.spark</groupId>

  3. <artifactId>spark-core_2.10</artifactId>

  4. <version>2.1.0</version>

  5. </dependency>

  6. <dependency>

  7. <groupId>org.apache.spark</groupId>

  8. <artifactId>spark-sql_2.10</artifactId>

  9. <version>2.1.0</version>

  10. </dependency>

  11. </dependencies>

Java:

  1. import java.io.Serializable;

  2. import java.util.Arrays;


  3. import org.apache.spark.SparkConf;

  4. import org.apache.spark.api.java.JavaRDD;

  5. import org.apache.spark.api.java.JavaSparkContext;

  6. import org.apache.spark.sql.Dataset;

  7. import org.apache.spark.sql.Row;

  8. import org.apache.spark.sql.SQLContext;

  9. import org.apache.spark.sql.SparkSession;


  10. public class SparkSqlTest {

  11. public static class Person implements Serializable {

  12. private static final long serialVersionUID = -6259413972682177507L;

  13. private String name;

  14. private int age;


  15. public Person(String name, int age) {

  16. this.name = name;

  17. this.age = age;

  18. }

  19. public String toString() {

  20. return name + ": " + age;

  21. }

  22. public String getName() {

  23. return name;

  24. }

  25. public void setName(String name) {

  26. this.name = name;

  27. }

  28. public int getAge() {

  29. return age;

  30. }

  31. public void setAge(int age) {

  32. this.age = age;

  33. }

  34. }


  35. public static void main(String[] args) {

  36. SparkConf conf = new SparkConf().setAppName("Test").setMaster("local");

  37. JavaSparkContext sc = new JavaSparkContext(conf);


  38. SparkSession spark = SparkSession.builder().appName("Test").getOrCreate();

  39. JavaRDD<String> input = sc.parallelize(Arrays.asList("abc,1", "test,2"));

  40. JavaRDD<Person> persons = input.map(s -> s.split(",")).map(s -> new Person(s[0], Integer.parseInt(s[1])));

  41. //[abc: 1, test: 2]

  42. System.out.println(persons.collect());


  43. Dataset<Row> df = spark.createDataFrame(persons, Person.class);


  44. /*

  45. +---+----+

  46. |age|name|

  47. +---+----+

  48. | 1| abc|

  49. | 2|test|

  50. +---+----+

  51. */

  52. df.show();


  53. /*

  54. root

  55. |-- age: integer (nullable = false)

  56. |-- name: string (nullable = true)

  57. */

  58. df.printSchema();


  59. SQLContext sql = new SQLContext(spark);

  60. sql.registerDataFrameAsTable(df, "person");


  61. /*

  62. +---+----+

  63. |age|name|

  64. +---+----+

  65. | 2|test|

  66. +---+----+

  67. */

  68. sql.sql("SELECT * FROM person WHERE age>1").show();


  69. sc.close();

  70. }


  71. }

到此,关于“Spark SQL怎么用”的学习就结束了,希望能够解决大家的疑惑。理论与实践的搭配能更好的帮助大家学习,快去试试吧!若想继续学习更多相关知识,请继续关注恰卡编程网网站,小编会继续努力为大家带来更多实用的文章!

发布于 2022-01-14 22:33:11
收藏
分享
海报
0 条评论
40
上一篇:奇异函数是什么意思 下一篇:解析大型.NET ERP系统代码的示例分析
目录

    0 条评论

    本站已关闭游客评论,请登录或者注册后再评论吧~

    忘记密码?

    图形验证码