-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathDockerfile
More file actions
86 lines (62 loc) · 2.63 KB
/
Dockerfile
File metadata and controls
86 lines (62 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
FROM ubuntu:14.04
# Set Image Labels
ARG GIT_COMMIT=unspecified
LABEL git_commit=$GIT_COMMIT
ARG COMMIT_DATE=unspecified
LABEL commit_date=$COMMIT_DATE
ARG VERSION=unspecified
LABEL version=$VERSION
####################
# JAVA
####################
ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-7-jdk && \
rm -rf /var/lib/apt/lists/*
####################
# HADOOP
####################
ENV HADOOP_VERSION 2.8.0
ENV HADOOP_HOME /usr/local/hadoop
ENV HADOOP_OPTS -Djava.library.path=/usr/local/hadoop/lib/native
ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y wget libzip2 libsnappy1 libssl-dev && \
wget http://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz && \
apt-get remove -y wget && \
rm -rf /var/lib/apt/lists/* && \
tar -zxf /hadoop-$HADOOP_VERSION.tar.gz && \
rm /hadoop-$HADOOP_VERSION.tar.gz && \
mv hadoop-$HADOOP_VERSION /usr/local/hadoop && \
mkdir -p /usr/local/hadoop/logs
# Overwrite default HADOOP configuration files with our config files
COPY conf $HADOOP_HOME/etc/hadoop/
# Formatting HDFS
RUN mkdir -p /data/dfs/data /data/dfs/name /data/dfs/namesecondary && \
hdfs namenode -format
VOLUME /data
# Helper script for starting YARN
ADD start-yarn.sh /usr/local/bin/start-yarn.sh
####################
# PORTS
####################
#
# http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.0/bk_HDP_Reference_Guide/content/reference_chap2.html
# http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/cdh_ig_ports_cdh5.html
# http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/core-default.xml
# http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml
# HDFS: NameNode (NN):
# 8020 = fs.defaultFS (IPC / File system metadata operations)
# (9000 is also frequently used alternatively)
# 8022 = dfs.namenode.servicerpc-address (optional port used by HDFS daemons to avoid sharing RPC port)
# 50070 = dfs.namenode.http-address (HTTP / NN Web UI)
# 50470 = dfs.namenode.https-address (HTTPS / Secure UI)
# HDFS: DataNode (DN):
# 50010 = dfs.datanode.address (Data transfer)
# 50020 = dfs.datanode.ipc.address (IPC / metadata operations)
# 50075 = dfs.datanode.http.address (HTTP / DN Web UI)
# 50475 = dfs.datanode.https.address (HTTPS / Secure UI)
# HDFS: Secondary NameNode (SNN)
# 50090 = dfs.secondary.http.address (HTTP / Checkpoint for NameNode metadata)
EXPOSE 9000 50070 50010 50020 50075 50090
CMD ["hdfs"]