Skip to content

Commit f59f715

Browse files
authored
feat: custom product versions for Hadoop, HBase, Phoenix, hbase-operator-tools, Druid, Hive and Spark (#1173)
* feat: support custom product versions * chore: changelog * chore: remove unnecessary variables * fix: hbase snapshot export script
1 parent 14aece9 commit f59f715

36 files changed

+452
-355
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ All notable changes to this project will be documented in this file.
8686
- testing-tools: Update base image ([#1165]).
8787
- trino: Enable custom versions ([#1168]).
8888
- opa: Enable custom versions ([#1170]).
89+
- use custom product versions for Hadoop, HBase, Phoenix, hbase-operator-tools, Druid, Hive and Spark ([#1173]).
8990

9091
### Fixed
9192

@@ -192,6 +193,7 @@ All notable changes to this project will be documented in this file.
192193
[#1165]: https://github.com/stackabletech/docker-images/pull/1165
193194
[#1168]: https://github.com/stackabletech/docker-images/pull/1168
194195
[#1170]: https://github.com/stackabletech/docker-images/pull/1170
196+
[#1173]: https://github.com/stackabletech/docker-images/pull/1173
195197
[#1179]: https://github.com/stackabletech/docker-images/pull/1179
196198
[#1180]: https://github.com/stackabletech/docker-images/pull/1180
197199

conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
druid = importlib.import_module("druid.versions")
1515
hadoop = importlib.import_module("hadoop.versions")
1616
hbase = importlib.import_module("hbase.versions")
17+
hbase_jars = importlib.import_module("hbase.hbase.versions")
1718
hbase_phoenix = importlib.import_module("hbase.phoenix.versions")
1819
hbase_opa_authorizer = importlib.import_module("hbase.hbase-opa-authorizer.versions")
1920
hbase_operator_tools = importlib.import_module("hbase.hbase-operator-tools.versions")
@@ -48,6 +49,7 @@
4849
{"name": "druid", "versions": druid.versions},
4950
{"name": "hadoop", "versions": hadoop.versions},
5051
{"name": "hbase", "versions": hbase.versions},
52+
{"name": "hbase/hbase", "versions": hbase_jars.versions},
5153
{"name": "hbase/phoenix", "versions": hbase_phoenix.versions},
5254
{"name": "hbase/hbase-opa-authorizer", "versions": hbase_opa_authorizer.versions},
5355
{"name": "hbase/hbase-operator-tools", "versions": hbase_operator_tools.versions},

druid/Dockerfile

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
# syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7
22
# check=error=true
33

4+
FROM stackable/image/hadoop AS hadoop-builder
5+
46
FROM stackable/image/java-devel AS druid-builder
57

68
ARG PRODUCT
9+
ARG RELEASE
710
ARG JACKSON_DATAFORMAT_XML
811
ARG STAX2_API
912
ARG WOODSTOX_CORE
1013
ARG AUTHORIZER
1114
ARG STACKABLE_USER_UID
15+
ARG HADOOP
1216

1317
# Setting this to anything other than "true" will keep the cache folders around (e.g. for Maven, NPM etc.)
1418
# This can be used to speed up builds when disk space is of no concern.
@@ -35,6 +39,7 @@ WORKDIR /stackable
3539
COPY --chown=${STACKABLE_USER_UID}:0 druid/stackable/patches/patchable.toml /stackable/src/druid/stackable/patches/patchable.toml
3640
COPY --chown=${STACKABLE_USER_UID}:0 druid/stackable/patches/${PRODUCT} /stackable/src/druid/stackable/patches/${PRODUCT}
3741

42+
COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs
3843
# Cache mounts are owned by root by default
3944
# We need to explicitly give the uid to use which is hardcoded to "1000" in stackable-base
4045
# The cache id has to include the product version that we are building because otherwise
@@ -54,24 +59,33 @@ RUN --mount=type=cache,id=maven-${PRODUCT},uid=${STACKABLE_USER_UID},target=/sta
5459
cd "$(cat /tmp/DRUID_SOURCE_DIR)" || exit 1
5560
rm /tmp/DRUID_SOURCE_DIR
5661

62+
ORIGINAL_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
63+
NEW_VERSION="${PRODUCT}-stackable${RELEASE}"
64+
65+
mvn versions:set -DnewVersion=$NEW_VERSION
66+
67+
# Make Maven aware of custom Stackable libraries
68+
cp -r /stackable/patched-libs/maven/* /stackable/.m2/repository
69+
5770
# Create snapshot of the source code including custom patches
58-
tar -czf /stackable/druid-${PRODUCT}-src.tar.gz .
71+
tar -czf /stackable/druid-${NEW_VERSION}-src.tar.gz .
5972

6073
mvn \
6174
--batch-mode \
6275
--no-transfer-progress \
6376
clean install \
6477
-Pdist,stackable-bundle-contrib-exts \
65-
-Dmaven.test.skip `# Skip test compilation` \
78+
-Dhadoop.compile.version=${HADOOP}-stackable${RELEASE} \
6679
-DskipTests `# Skip test execution` \
6780
-Dcheckstyle.skip `# Skip checkstyle checks. We dont care if the code is properly formatted, it just wastes time` \
6881
-Dmaven.javadoc.skip=true `# Dont generate javadoc` \
6982
-Dmaven.gitcommitid.skip=true `# The gitcommitid plugin cannot work with git workspaces (ie: patchable)` \
7083
$(if [[ ${PRODUCT} != 30.* ]]; then echo --projects '!quidem-ut'; fi) `# This is just a maven module for tests. https://github.com/apache/druid/pull/16867 added https://raw.githubusercontent.com/kgyrtkirk/datasets/repo/ as a Maven repository, which fails to pull for us (Failed to execute goal on project druid-quidem-ut: Could not resolve dependencies for project org.apache.druid:druid-quidem-ut:jar:33.0.0: com.github.kgyrtkirk.datasets:kttm-nested:jar:0.1 was not found in https://build-repo.stackable.tech/repository/maven-public/). By disabling the maven module we dont pull in this weird dependency...`
7184

72-
mv distribution/target/apache-druid-${PRODUCT}-bin/apache-druid-${PRODUCT} /stackable/
73-
mv distribution/target/bom.json /stackable/apache-druid-${PRODUCT}/apache-druid-${PRODUCT}.cdx.json
74-
rm -rf /stackable/apache-druid-${PRODUCT}-src
85+
mv distribution/target/apache-druid-${NEW_VERSION}-bin/apache-druid-${NEW_VERSION} /stackable/
86+
sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" distribution/target/bom.json
87+
mv distribution/target/bom.json /stackable/apache-druid-${NEW_VERSION}/apache-druid-${NEW_VERSION}.cdx.json
88+
rm -rf /stackable/apache-druid-${NEW_VERSION}-src
7589

7690
# We're removing these to make the intermediate layer smaller
7791
# This can be necessary even though it's only a builder image because the GitHub Action Runners only have very limited space available
@@ -83,11 +97,11 @@ if [ "${DELETE_CACHES}" = "true" ] ; then
8397
rm -rf /stackable/.cache/*
8498
fi
8599

86-
# Do not remove the /stackable/apache-druid-${PRODUCT}/quickstart folder, it is needed for loading the Wikipedia
100+
# Do not remove the /stackable/apache-druid-${NEW_VERSION}/quickstart folder, it is needed for loading the Wikipedia
87101
# testdata in kuttl tests and the getting started guide.
88102

89103
# Install OPA authorizer extension.
90-
curl "https://repo.stackable.tech/repository/packages/druid/druid-opa-authorizer-${AUTHORIZER}.tar.gz" | tar -xzC /stackable/apache-druid-${PRODUCT}/extensions
104+
curl "https://repo.stackable.tech/repository/packages/druid/druid-opa-authorizer-${AUTHORIZER}.tar.gz" | tar -xzC /stackable/apache-druid-${NEW_VERSION}/extensions
91105

92106
# change groups
93107
chmod -R g=u /stackable
@@ -122,8 +136,8 @@ LABEL io.k8s.description="${DESCRIPTION}"
122136
LABEL io.k8s.display-name="${NAME}"
123137

124138

125-
COPY --chown=${STACKABLE_USER_UID}:0 --from=druid-builder /stackable/apache-druid-${PRODUCT} /stackable/apache-druid-${PRODUCT}
126-
COPY --chown=${STACKABLE_USER_UID}:0 --from=druid-builder /stackable/druid-${PRODUCT}-src.tar.gz /stackable
139+
COPY --chown=${STACKABLE_USER_UID}:0 --from=druid-builder /stackable/apache-druid-${PRODUCT}-stackable${RELEASE} /stackable/apache-druid-${PRODUCT}-stackable${RELEASE}
140+
COPY --chown=${STACKABLE_USER_UID}:0 --from=druid-builder /stackable/druid-${PRODUCT}-stackable${RELEASE}-src.tar.gz /stackable
127141

128142
COPY --chown=${STACKABLE_USER_UID}:0 druid/stackable/bin /stackable/bin
129143
COPY --chown=${STACKABLE_USER_UID}:0 druid/licenses /licenses
@@ -136,7 +150,7 @@ chown ${STACKABLE_USER_UID}:0 /stackable/package_manifest.txt
136150
chmod g=u /stackable/package_manifest.txt
137151
rm -rf /var/cache/yum
138152

139-
ln -sf /stackable/apache-druid-${PRODUCT} /stackable/druid
153+
ln -sf /stackable/apache-druid-${PRODUCT}-stackable${RELEASE} /stackable/druid
140154
chown -h ${STACKABLE_USER_UID}:0 stackable/druid
141155

142156
# Force to overwrite the existing 'run-druid'
@@ -145,7 +159,7 @@ chown -h ${STACKABLE_USER_UID}:0 /stackable/druid/bin/run-druid
145159

146160
# fix missing permissions
147161
chmod -R g=u /stackable/bin
148-
chmod g=u /stackable/apache-druid-${PRODUCT}
162+
chmod g=u /stackable/apache-druid-${PRODUCT}-stackable${RELEASE} /stackable/druid-${PRODUCT}-stackable${RELEASE}-src.tar.gz
149163
EOF
150164

151165
# ----------------------------------------

druid/stackable/patches/30.0.1/0010-Fix-CVE-2023-34455.patch

Lines changed: 0 additions & 43 deletions
This file was deleted.

druid/stackable/patches/31.0.1/0010-Fix-CVE-2023-34455.patch

Lines changed: 0 additions & 43 deletions
This file was deleted.

druid/stackable/patches/33.0.0/0010-Fix-CVE-2023-34455.patch

Lines changed: 0 additions & 43 deletions
This file was deleted.

druid/versions.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,23 @@
44
# https://druid.apache.org/docs/30.0.1/operations/java/
55
"java-base": "17",
66
"java-devel": "17",
7+
"hadoop": "3.3.6",
78
"authorizer": "0.7.0",
89
},
910
{
1011
"product": "31.0.1",
1112
# https://druid.apache.org/docs/31.0.1/operations/java/
1213
"java-base": "17",
1314
"java-devel": "17",
15+
"hadoop": "3.3.6",
1416
"authorizer": "0.7.0",
1517
},
1618
{
1719
"product": "33.0.0",
1820
# https://druid.apache.org/docs/33.0.0/operations/java/
1921
"java-base": "17",
2022
"java-devel": "17",
23+
"hadoop": "3.3.6",
2124
"authorizer": "0.7.0",
2225
},
2326
]

hadoop/Dockerfile

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
FROM stackable/image/java-devel AS hadoop-builder
55

66
ARG PRODUCT
7+
ARG RELEASE
78
ARG ASYNC_PROFILER
89
ARG JMX_EXPORTER
910
ARG PROTOBUF
@@ -66,6 +67,7 @@ COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches/patchable.toml /bu
6667
COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches/${PRODUCT} /build/src/hadoop/stackable/patches/${PRODUCT}
6768
COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/fuse_dfs_wrapper /build
6869
COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx
70+
USER ${STACKABLE_USER_UID}
6971
# Hadoop Pipes requires libtirpc to build, whose headers are not packaged in RedHat UBI, so skip building this module
7072
# Build from source to enable FUSE module, and to apply custom patches.
7173
# Also skip building the yarn, mapreduce and minicluster modules: this will result in the modules being excluded but not all
@@ -74,27 +76,42 @@ COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx
7476
RUN <<EOF
7577
cd "$(/stackable/patchable --images-repo-root=src checkout hadoop ${PRODUCT})"
7678

79+
ORIGINAL_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
80+
NEW_VERSION=${PRODUCT}-stackable${RELEASE}
81+
82+
mvn versions:set -DnewVersion=${NEW_VERSION}
83+
84+
# Since we skip building the hadoop-pipes module, we need to set the version to the original version so it can be pulled from Maven Central
85+
sed -e '/<artifactId>hadoop-pipes<\/artifactId>/,/<\/dependency>/ { s/<version>.*<\/version>/<version>'"$ORIGINAL_VERSION"'<\/version>/ }' -i hadoop-tools/hadoop-tools-dist/pom.xml
86+
7787
# Create snapshot of the source code including custom patches
78-
tar -czf /stackable/hadoop-${PRODUCT}-src.tar.gz .
88+
tar -czf /stackable/hadoop-${NEW_VERSION}-src.tar.gz .
7989

8090
mvn \
8191
--batch-mode \
8292
--no-transfer-progress \
83-
clean package \
93+
clean package install \
8494
-Pdist,native \
85-
-pl '!hadoop-tools/hadoop-pipes,!hadoop-yarn-project,!hadoop-mapreduce-project,!hadoop-minicluster' \
95+
-pl '!hadoop-tools/hadoop-pipes' \
96+
-Dhadoop.version=${NEW_VERSION} \
8697
-Drequire.fuse=true \
8798
-DskipTests \
8899
-Dmaven.javadoc.skip=true
89100

90-
cp -r hadoop-dist/target/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT}
91-
mv hadoop-dist/target/bom.json /stackable/hadoop-${PRODUCT}/hadoop-${PRODUCT}.cdx.json
101+
mkdir -p /stackable/patched-libs/maven/org/apache
102+
cp -r /stackable/.m2/repository/org/apache/hadoop /stackable/patched-libs/maven/org/apache
103+
104+
cp -r hadoop-dist/target/hadoop-${NEW_VERSION} /stackable/hadoop-${NEW_VERSION}
105+
sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" hadoop-dist/target/bom.json
106+
mv hadoop-dist/target/bom.json /stackable/hadoop-${NEW_VERSION}/hadoop-${NEW_VERSION}.cdx.json
92107

93108
# HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves
94-
cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin
95-
rm -rf /build/hadoop-${PRODUCT}-src
109+
cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${NEW_VERSION}/bin
110+
111+
# Remove source code
112+
(cd .. && rm -r ${PRODUCT})
96113

97-
ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop
114+
ln -s /stackable/hadoop-${NEW_VERSION} /stackable/hadoop
98115

99116
mv /build/fuse_dfs_wrapper /stackable/hadoop/bin
100117

@@ -109,9 +126,9 @@ rm -rf /stackable/hadoop/share/hadoop/tools/sources/
109126
rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar
110127
rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar
111128
rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar
112-
find /stackable -name 'hadoop-minicluster-*.jar' -type f -delete
113-
find /stackable -name 'hadoop-client-minicluster-*.jar' -type f -delete
114-
find /stackable -name 'hadoop-*tests.jar' -type f -delete
129+
find /stackable/hadoop -name 'hadoop-minicluster-*.jar' -type f -delete
130+
find /stackable/hadoop -name 'hadoop-client-minicluster-*.jar' -type f -delete
131+
find /stackable/hadoop -name 'hadoop-*tests.jar' -type f -delete
115132
rm -rf /stackable/.m2
116133

117134
# Set correct groups; make sure only required artifacts for the final image are located in /stackable
@@ -187,7 +204,7 @@ LABEL \
187204
description="This image is deployed by the Stackable Operator for Apache Hadoop / HDFS."
188205

189206
COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable /stackable
190-
COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
207+
COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}-stackable${RELEASE}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
191208
COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}-src.tar.gz /stackable
192209

193210
COPY --chown=${STACKABLE_USER_UID}:0 hadoop/licenses /licenses

0 commit comments

Comments
 (0)