Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.0.2 zeppelin 0.10.1 hadoop 3.3.0 spark 3.2.2 #8

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* text=auto eol=lf
7 changes: 4 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@ down:
docker network rm spark-net

bash:
docker exec -it dockerzeppelin_zeppelin_1 bash
docker run --rm -it bde2020/zeppelin:0.10.1 bash

run:
docker build -t zeppelin ./zeppelin/.
docker run -it --rm --net spark-net -p 80:8080 -v $(shell pwd)/notebook:/opt/zeppelin/notebook -v $(shell pwd)/zeppelin-0.7.2-bin-all:/opt/zeppelin zeppelin /bin/bash
#docker run -it --rm --net spark-net -p 80:8080 -v $(shell pwd)/notebook:/opt/zeppelin/notebook -v $(shell pwd)/zeppelin-0.10.1-bin-all:/opt/zeppelin zeppelin /bin/bash
#docker run -it --rm --net spark-net -p 80:8080 -v $(shell pwd)/notebook:/opt/zeppelin/notebook zeppelin /opt/zeppelin/bin/zeppelin.sh
docker run -it --rm --net spark-net -p 8091:8080 -e ZEPPELIN_ADDR=0.0.0.0 -v $(shell pwd)/notebook:/opt/zeppelin/notebook zeppelin /bin/bash

build:
docker build -t earthquakesan/zeppelin:0.7.2 ./zeppelin/.
docker build -t bde2020/zeppelin:0.10.1 ./zeppelin/.
1 change: 1 addition & 0 deletions data/namenode/in_use.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
84@edd0807d5479e
41 changes: 26 additions & 15 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
version: "2.1"
version: "3"

services:
namenode:
image: bde2020/hadoop-namenode:1.1.0-hadoop2.8-java8
image: bde2020/hadoop-namenode:2.0.0-hadoop3.3.3-java8
container_name: namenode
volumes:
- ./data/namenode:/hadoop/dfs/name
environment:
- CLUSTER_NAME=test
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- CORE_CONF_fs_defaultFS=hdfs://namenode:9000
healthcheck:
interval: 5s
retries: 100
networks:
- spark-net
datanode:
image: bde2020/hadoop-datanode:1.1.0-hadoop2.8-java8
image: bde2020/hadoop-datanode:2.0.0-hadoop3.3.3-java8
container_name: datanode
volumes:
- ./data/datanode:/hadoop/dfs/data
environment:
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- CORE_CONF_fs_defaultFS=hdfs://namenode:9000
depends_on:
namenode:
condition: service_healthy
Expand All @@ -30,13 +30,15 @@ services:
networks:
- spark-net
spark-master:
image: bde2020/spark-master:2.1.0-hadoop2.8-hive-java8
image: bde2020/spark-master:3.3.0-hadoop3.3
container_name: spark-master
ports:
- "8080:8080"
- "7077:7077"
environment:
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- CORE_CONF_fs_defaultFS=hdfs://namenode:9000
environment:
SERVICE_PRECONDITION: "namenode:9000 namenode:9870 datanode:9864"
depends_on:
namenode:
condition: service_healthy
Expand All @@ -48,14 +50,14 @@ services:
networks:
- spark-net
spark-worker:
image: bde2020/spark-worker:2.1.0-hadoop2.8-hive-java8
image: bde2020/spark-worker:3.3.0-hadoop3.3
environment:
- "SPARK_MASTER=spark://spark-master:7077"
environment:
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- CORE_CONF_fs_defaultFS=hdfs://namenode:9000
depends_on:
spark-master:
condition: service_healthy
- spark-master
# condition: service_healthy
healthcheck:
interval: 5s
retries: 100
Expand All @@ -64,19 +66,28 @@ services:
zeppelin:
build: ./zeppelin
ports:
- 80:8080
- 8090:8080
volumes:
- ./notebook:/opt/zeppelin/notebook
environment:
CORE_CONF_fs_defaultFS: "hdfs://namenode:8020"
CORE_CONF_fs_defaultFS: "hdfs://namenode:9000"
SPARK_MASTER: "spark://spark-master:7077"
SPARK_HOME: "/spark"
MASTER: "spark://spark-master:7077"
ZEPPELIN_ADDR: "0.0.0.0"

#SPARK_SUBMIT_OPTIONS: "--jars /opt/sansa-examples/jars/sansa-examples-spark-2016-12.jar"
depends_on:
spark-master:
condition: service_healthy

# condition: service_healthy

# condition: service_healthy
namenode:
condition: service_healthy
depends_on:
- spark-master
- spark-worker

networks:
- spark-net

Expand Down
57 changes: 23 additions & 34 deletions notebook/2CF34ERK6/note.json
Original file line number Diff line number Diff line change
@@ -1,47 +1,50 @@
{
"paragraphs": [
{
"text": "import scala.math.random\n\nimport org.apache.spark.sql.SparkSession\n\nval slices \u003d 2\nval n \u003d math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow\nval count \u003d spark.sparkContext.parallelize(1 until n, slices).map { i \u003d\u003e\n val x \u003d random * 2 - 1\n val y \u003d random * 2 - 1\n if (x*x + y*y \u003c\u003d 1) 1 else 0\n}.reduce(_ + _)\nprintln(\"Pi is roughly \" + 4.0 * count / (n - 1))",
"text": "%pyspark\nimport scala.math.random\n\nimport org.apache.spark.sql.SparkSession\n\nval slices \u003d 2\nval n \u003d math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow\nval count \u003d spark.sparkContext.parallelize(1 until n, slices).map { i \u003d\u003e\n val x \u003d random * 2 - 1\n val y \u003d random * 2 - 1\n if (x*x + y*y \u003c\u003d 1) 1 else 0\n}.reduce(_ + _)\nprintln(\"Pi is roughly \" + 4.0 * count / (n - 1))",
"user": "anonymous",
"dateUpdated": "May 11, 2017 8:47:28 AM",
"dateUpdated": "2022-10-08 03:16:05.764",
"config": {
"colWidth": 12.0,
"enabled": true,
"results": {},
"editorSetting": {
"language": "text",
"editOnDblClick": false
"language": "python",
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
},
"editorMode": "ace/mode/text",
"editorMode": "ace/mode/python",
"editorHide": false,
"tableHide": true
"tableHide": true,
"fontSize": 9.0
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"code": "ERROR",
"msg": [
{
"type": "TEXT",
"data": "\nimport scala.math.random\n\nimport org.apache.spark.sql.SparkSession\n\nslices: Int \u003d 2\n\nn: Int \u003d 200000\n\ncount: Int \u003d 156835\nPi is roughly 3.136715683578418\n"
"data": "java.lang.NoSuchMethodError: scala.tools.nsc.Settings.usejavacp()Lscala/tools/nsc/settings/AbsSettings$AbsSetting;\n\tat org.apache.zeppelin.spark.SparkScala211Interpreter.open(SparkScala211Interpreter.scala:70)\n\tat org.apache.zeppelin.spark.NewSparkInterpreter.open(NewSparkInterpreter.java:102)\n\tat org.apache.zeppelin.spark.SparkInterpreter.open(SparkInterpreter.java:62)\n\tat org.apache.zeppelin.interpreter.LazyOpenInterpreter.open(LazyOpenInterpreter.java:69)\n\tat org.apache.zeppelin.spark.PySparkInterpreter.getSparkInterpreter(PySparkInterpreter.java:664)\n\tat org.apache.zeppelin.spark.PySparkInterpreter.createGatewayServerAndStartScript(PySparkInterpreter.java:260)\n\tat org.apache.zeppelin.spark.PySparkInterpreter.open(PySparkInterpreter.java:194)\n\tat org.apache.zeppelin.interpreter.LazyOpenInterpreter.open(LazyOpenInterpreter.java:69)\n\tat org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:616)\n\tat org.apache.zeppelin.scheduler.Job.run(Job.java:188)\n\tat org.apache.zeppelin.scheduler.FIFOScheduler$1.run(FIFOScheduler.java:140)\n\tat java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\tat java.lang.Thread.run(Thread.java:748)\n"
}
]
},
"apps": [],
"jobName": "paragraph_1494446151737_1359394047",
"id": "20170510-195551_400594820",
"dateCreated": "May 10, 2017 7:55:51 PM",
"dateStarted": "May 10, 2017 8:40:36 PM",
"dateFinished": "May 10, 2017 8:41:04 PM",
"status": "FINISHED",
"dateCreated": "2017-05-10 19:55:51.000",
"dateStarted": "2022-10-08 03:16:05.784",
"dateFinished": "2022-10-08 03:16:06.245",
"status": "ERROR",
"progressUpdateIntervalMs": 500
},
{
"text": "",
"user": "anonymous",
"dateUpdated": "May 11, 2017 8:49:13 AM",
"dateUpdated": "2017-05-11 08:49:13.000",
"config": {
"colWidth": 12.0,
"enabled": true,
Expand All @@ -60,34 +63,20 @@
"apps": [],
"jobName": "paragraph_1494446793336_527866307",
"id": "20170510-200633_2002902352",
"dateCreated": "May 10, 2017 8:06:33 PM",
"dateCreated": "2017-05-10 20:06:33.000",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
}
],
"name": "SparkPi",
"id": "2CF34ERK6",
"noteParams": {},
"noteForms": {},
"angularObjects": {
"2CHD267MK:shared_process": [],
"2CFCYW8ZZ:shared_process": [],
"2CGGU1AUC:shared_process": [],
"2CHDUK5RT:shared_process": [],
"2CFPWN8ZX:shared_process": [],
"2CJT3A9WM:shared_process": [],
"2CFTT4BX1:shared_process": [],
"2CHPV1WNR:shared_process": [],
"2CH8NUNKD:shared_process": [],
"2CG24PUFX:shared_process": [],
"2CJHA5F79:shared_process": [],
"2CFD8HYGS:shared_process": [],
"2CH5TSP4J:shared_process": [],
"2CF3WY7WY:shared_process": [],
"2CFYWKGJK:shared_process": [],
"2CJFHW9TZ:shared_process": [],
"2CGEFHREK:shared_process": [],
"2CG4M1FG9:shared_process": [],
"2CH3SASQ1:shared_process": []
"spark:shared_process": []
},
"config": {
"isZeppelinNotebookCronEnable": false
},
"config": {},
"info": {}
}
107 changes: 107 additions & 0 deletions notebook/2HHJNC9Z8/note.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
{
"paragraphs": [
{
"text": "val x \u003d sc.textFile(\"/lenta_articles/*.txt\", 4)",
"user": "anonymous",
"dateUpdated": "2022-10-08 03:24:24.338",
"config": {
"colWidth": 12.0,
"fontSize": 9.0,
"enabled": true,
"results": {},
"editorSetting": {
"language": "scala",
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
},
"editorMode": "ace/mode/scala"
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "ERROR",
"msg": [
{
"type": "TEXT",
"data": "java.lang.NoSuchMethodError: scala.tools.nsc.Settings.usejavacp()Lscala/tools/nsc/settings/AbsSettings$AbsSetting;\n\tat org.apache.zeppelin.spark.SparkScala211Interpreter.open(SparkScala211Interpreter.scala:70)\n\tat org.apache.zeppelin.spark.NewSparkInterpreter.open(NewSparkInterpreter.java:102)\n\tat org.apache.zeppelin.spark.SparkInterpreter.open(SparkInterpreter.java:62)\n\tat org.apache.zeppelin.interpreter.LazyOpenInterpreter.open(LazyOpenInterpreter.java:69)\n\tat org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:616)\n\tat org.apache.zeppelin.scheduler.Job.run(Job.java:188)\n\tat org.apache.zeppelin.scheduler.FIFOScheduler$1.run(FIFOScheduler.java:140)\n\tat java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\tat java.lang.Thread.run(Thread.java:748)\n"
}
]
},
"apps": [],
"jobName": "paragraph_1665196689001_1295208312",
"id": "20221008-023809_2067929533",
"dateCreated": "2022-10-08 02:38:09.001",
"dateStarted": "2022-10-08 03:24:24.353",
"dateFinished": "2022-10-08 03:24:24.373",
"status": "ERROR",
"progressUpdateIntervalMs": 500
},
{
"text": "x.take(3).foreach(println)",
"user": "anonymous",
"dateUpdated": "2022-10-08 03:17:30.596",
"config": {
"colWidth": 12.0,
"fontSize": 9.0,
"enabled": true,
"results": {},
"editorSetting": {
"language": "scala",
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
},
"editorMode": "ace/mode/scala"
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "ERROR",
"msg": [
{
"type": "TEXT",
"data": "java.lang.NoSuchMethodError: scala.tools.nsc.Settings.usejavacp()Lscala/tools/nsc/settings/AbsSettings$AbsSetting;\n\tat org.apache.zeppelin.spark.SparkScala211Interpreter.open(SparkScala211Interpreter.scala:70)\n\tat org.apache.zeppelin.spark.NewSparkInterpreter.open(NewSparkInterpreter.java:102)\n\tat org.apache.zeppelin.spark.SparkInterpreter.open(SparkInterpreter.java:62)\n\tat org.apache.zeppelin.interpreter.LazyOpenInterpreter.open(LazyOpenInterpreter.java:69)\n\tat org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:616)\n\tat org.apache.zeppelin.scheduler.Job.run(Job.java:188)\n\tat org.apache.zeppelin.scheduler.FIFOScheduler$1.run(FIFOScheduler.java:140)\n\tat java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\tat java.lang.Thread.run(Thread.java:748)\n"
}
]
},
"apps": [],
"jobName": "paragraph_1665197304739_-948041871",
"id": "20221008-024824_721918440",
"dateCreated": "2022-10-08 02:48:24.739",
"dateStarted": "2022-10-08 03:17:30.614",
"dateFinished": "2022-10-08 03:17:30.633",
"status": "ERROR",
"progressUpdateIntervalMs": 500
},
{
"user": "anonymous",
"config": {},
"settings": {
"params": {},
"forms": {}
},
"apps": [],
"jobName": "paragraph_1665197328481_-57736739",
"id": "20221008-024848_945498816",
"dateCreated": "2022-10-08 02:48:48.481",
"status": "READY",
"progressUpdateIntervalMs": 500
}
],
"name": "Untitled Note 1",
"id": "2HHJNC9Z8",
"noteParams": {},
"noteForms": {},
"angularObjects": {
"spark:shared_process": []
},
"config": {
"isZeppelinNotebookCronEnable": false
},
"info": {}
}
Loading