Skip to content

feat: custom product versions for Hadoop, HBase, Phoenix, hbase-operator-tools, Druid, Hive and Spark #1173

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ All notable changes to this project will be documented in this file.
- testing-tools: Update base image ([#1165]).
- trino: Enable custom versions ([#1168]).
- opa: Enable custom versions ([#1170]).
- use custom product versions for Hadoop, HBase, Phoenix, hbase-operator-tools, Druid, Hive and Spark ([#1173]).

### Fixed

Expand Down Expand Up @@ -192,6 +193,7 @@ All notable changes to this project will be documented in this file.
[#1165]: https://github.com/stackabletech/docker-images/pull/1165
[#1168]: https://github.com/stackabletech/docker-images/pull/1168
[#1170]: https://github.com/stackabletech/docker-images/pull/1170
[#1173]: https://github.com/stackabletech/docker-images/pull/1173
[#1179]: https://github.com/stackabletech/docker-images/pull/1179
[#1180]: https://github.com/stackabletech/docker-images/pull/1180

Expand Down
2 changes: 2 additions & 0 deletions conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
druid = importlib.import_module("druid.versions")
hadoop = importlib.import_module("hadoop.versions")
hbase = importlib.import_module("hbase.versions")
hbase_jars = importlib.import_module("hbase.hbase.versions")
hbase_phoenix = importlib.import_module("hbase.phoenix.versions")
hbase_opa_authorizer = importlib.import_module("hbase.hbase-opa-authorizer.versions")
hbase_operator_tools = importlib.import_module("hbase.hbase-operator-tools.versions")
Expand Down Expand Up @@ -48,6 +49,7 @@
{"name": "druid", "versions": druid.versions},
{"name": "hadoop", "versions": hadoop.versions},
{"name": "hbase", "versions": hbase.versions},
{"name": "hbase/hbase", "versions": hbase_jars.versions},
{"name": "hbase/phoenix", "versions": hbase_phoenix.versions},
{"name": "hbase/hbase-opa-authorizer", "versions": hbase_opa_authorizer.versions},
{"name": "hbase/hbase-operator-tools", "versions": hbase_operator_tools.versions},
Expand Down
36 changes: 25 additions & 11 deletions druid/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
# syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7
# check=error=true

FROM stackable/image/hadoop AS hadoop-builder

FROM stackable/image/java-devel AS druid-builder

ARG PRODUCT
ARG RELEASE
ARG JACKSON_DATAFORMAT_XML
ARG STAX2_API
ARG WOODSTOX_CORE
ARG AUTHORIZER
ARG STACKABLE_USER_UID
ARG HADOOP

# Setting this to anything other than "true" will keep the cache folders around (e.g. for Maven, NPM etc.)
# This can be used to speed up builds when disk space is of no concern.
Expand All @@ -35,6 +39,7 @@ WORKDIR /stackable
COPY --chown=${STACKABLE_USER_UID}:0 druid/stackable/patches/patchable.toml /stackable/src/druid/stackable/patches/patchable.toml
COPY --chown=${STACKABLE_USER_UID}:0 druid/stackable/patches/${PRODUCT} /stackable/src/druid/stackable/patches/${PRODUCT}

COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs
# Cache mounts are owned by root by default
# We need to explicitly give the uid to use which is hardcoded to "1000" in stackable-base
# The cache id has to include the product version that we are building because otherwise
Expand All @@ -54,24 +59,33 @@ RUN --mount=type=cache,id=maven-${PRODUCT},uid=${STACKABLE_USER_UID},target=/sta
cd "$(cat /tmp/DRUID_SOURCE_DIR)" || exit 1
rm /tmp/DRUID_SOURCE_DIR

ORIGINAL_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
NEW_VERSION="${PRODUCT}-stackable${RELEASE}"

mvn versions:set -DnewVersion=$NEW_VERSION

# Make Maven aware of custom Stackable libraries
cp -r /stackable/patched-libs/maven/* /stackable/.m2/repository

# Create snapshot of the source code including custom patches
tar -czf /stackable/druid-${PRODUCT}-src.tar.gz .
tar -czf /stackable/druid-${NEW_VERSION}-src.tar.gz .

mvn \
--batch-mode \
--no-transfer-progress \
clean install \
-Pdist,stackable-bundle-contrib-exts \
-Dmaven.test.skip `# Skip test compilation` \
-Dhadoop.compile.version=${HADOOP}-stackable${RELEASE} \
-DskipTests `# Skip test execution` \
-Dcheckstyle.skip `# Skip checkstyle checks. We dont care if the code is properly formatted, it just wastes time` \
-Dmaven.javadoc.skip=true `# Dont generate javadoc` \
-Dmaven.gitcommitid.skip=true `# The gitcommitid plugin cannot work with git workspaces (ie: patchable)` \
$(if [[ ${PRODUCT} != 30.* ]]; then echo --projects '!quidem-ut'; fi) `# This is just a maven module for tests. https://github.com/apache/druid/pull/16867 added https://raw.githubusercontent.com/kgyrtkirk/datasets/repo/ as a Maven repository, which fails to pull for us (Failed to execute goal on project druid-quidem-ut: Could not resolve dependencies for project org.apache.druid:druid-quidem-ut:jar:33.0.0: com.github.kgyrtkirk.datasets:kttm-nested:jar:0.1 was not found in https://build-repo.stackable.tech/repository/maven-public/). By disabling the maven module we dont pull in this weird dependency...`

mv distribution/target/apache-druid-${PRODUCT}-bin/apache-druid-${PRODUCT} /stackable/
mv distribution/target/bom.json /stackable/apache-druid-${PRODUCT}/apache-druid-${PRODUCT}.cdx.json
rm -rf /stackable/apache-druid-${PRODUCT}-src
mv distribution/target/apache-druid-${NEW_VERSION}-bin/apache-druid-${NEW_VERSION} /stackable/
sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" distribution/target/bom.json
mv distribution/target/bom.json /stackable/apache-druid-${NEW_VERSION}/apache-druid-${NEW_VERSION}.cdx.json
rm -rf /stackable/apache-druid-${NEW_VERSION}-src

# We're removing these to make the intermediate layer smaller
# This can be necessary even though it's only a builder image because the GitHub Action Runners only have very limited space available
Expand All @@ -83,11 +97,11 @@ if [ "${DELETE_CACHES}" = "true" ] ; then
rm -rf /stackable/.cache/*
fi

# Do not remove the /stackable/apache-druid-${PRODUCT}/quickstart folder, it is needed for loading the Wikipedia
# Do not remove the /stackable/apache-druid-${NEW_VERSION}/quickstart folder, it is needed for loading the Wikipedia
# testdata in kuttl tests and the getting started guide.

# Install OPA authorizer extension.
curl "https://repo.stackable.tech/repository/packages/druid/druid-opa-authorizer-${AUTHORIZER}.tar.gz" | tar -xzC /stackable/apache-druid-${PRODUCT}/extensions
curl "https://repo.stackable.tech/repository/packages/druid/druid-opa-authorizer-${AUTHORIZER}.tar.gz" | tar -xzC /stackable/apache-druid-${NEW_VERSION}/extensions

# change groups
chmod -R g=u /stackable
Expand Down Expand Up @@ -122,8 +136,8 @@ LABEL io.k8s.description="${DESCRIPTION}"
LABEL io.k8s.display-name="${NAME}"


COPY --chown=${STACKABLE_USER_UID}:0 --from=druid-builder /stackable/apache-druid-${PRODUCT} /stackable/apache-druid-${PRODUCT}
COPY --chown=${STACKABLE_USER_UID}:0 --from=druid-builder /stackable/druid-${PRODUCT}-src.tar.gz /stackable
COPY --chown=${STACKABLE_USER_UID}:0 --from=druid-builder /stackable/apache-druid-${PRODUCT}-stackable${RELEASE} /stackable/apache-druid-${PRODUCT}-stackable${RELEASE}
COPY --chown=${STACKABLE_USER_UID}:0 --from=druid-builder /stackable/druid-${PRODUCT}-stackable${RELEASE}-src.tar.gz /stackable

COPY --chown=${STACKABLE_USER_UID}:0 druid/stackable/bin /stackable/bin
COPY --chown=${STACKABLE_USER_UID}:0 druid/licenses /licenses
Expand All @@ -136,7 +150,7 @@ chown ${STACKABLE_USER_UID}:0 /stackable/package_manifest.txt
chmod g=u /stackable/package_manifest.txt
rm -rf /var/cache/yum

ln -sf /stackable/apache-druid-${PRODUCT} /stackable/druid
ln -sf /stackable/apache-druid-${PRODUCT}-stackable${RELEASE} /stackable/druid
chown -h ${STACKABLE_USER_UID}:0 stackable/druid

# Force to overwrite the existing 'run-druid'
Expand All @@ -145,7 +159,7 @@ chown -h ${STACKABLE_USER_UID}:0 /stackable/druid/bin/run-druid

# fix missing permissions
chmod -R g=u /stackable/bin
chmod g=u /stackable/apache-druid-${PRODUCT}
chmod g=u /stackable/apache-druid-${PRODUCT}-stackable${RELEASE} /stackable/druid-${PRODUCT}-stackable${RELEASE}-src.tar.gz
EOF

# ----------------------------------------
Expand Down
43 changes: 0 additions & 43 deletions druid/stackable/patches/30.0.1/0010-Fix-CVE-2023-34455.patch

This file was deleted.

43 changes: 0 additions & 43 deletions druid/stackable/patches/31.0.1/0010-Fix-CVE-2023-34455.patch

This file was deleted.

43 changes: 0 additions & 43 deletions druid/stackable/patches/33.0.0/0010-Fix-CVE-2023-34455.patch

This file was deleted.

3 changes: 3 additions & 0 deletions druid/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,23 @@
# https://druid.apache.org/docs/30.0.1/operations/java/
"java-base": "17",
"java-devel": "17",
"hadoop": "3.3.6",
"authorizer": "0.7.0",
},
{
"product": "31.0.1",
# https://druid.apache.org/docs/31.0.1/operations/java/
"java-base": "17",
"java-devel": "17",
"hadoop": "3.3.6",
"authorizer": "0.7.0",
},
{
"product": "33.0.0",
# https://druid.apache.org/docs/33.0.0/operations/java/
"java-base": "17",
"java-devel": "17",
"hadoop": "3.3.6",
"authorizer": "0.7.0",
},
]
41 changes: 29 additions & 12 deletions hadoop/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
FROM stackable/image/java-devel AS hadoop-builder

ARG PRODUCT
ARG RELEASE
ARG ASYNC_PROFILER
ARG JMX_EXPORTER
ARG PROTOBUF
Expand Down Expand Up @@ -66,6 +67,7 @@ COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches/patchable.toml /bu
COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches/${PRODUCT} /build/src/hadoop/stackable/patches/${PRODUCT}
COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/fuse_dfs_wrapper /build
COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx
USER ${STACKABLE_USER_UID}
# Hadoop Pipes requires libtirpc to build, whose headers are not packaged in RedHat UBI, so skip building this module
# Build from source to enable FUSE module, and to apply custom patches.
# Also skip building the yarn, mapreduce and minicluster modules: this will result in the modules being excluded but not all
Expand All @@ -74,27 +76,42 @@ COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx
RUN <<EOF
cd "$(/stackable/patchable --images-repo-root=src checkout hadoop ${PRODUCT})"

ORIGINAL_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
NEW_VERSION=${PRODUCT}-stackable${RELEASE}

mvn versions:set -DnewVersion=${NEW_VERSION}

# Since we skip building the hadoop-pipes module, we need to set the version to the original version so it can be pulled from Maven Central
sed -e '/<artifactId>hadoop-pipes<\/artifactId>/,/<\/dependency>/ { s/<version>.*<\/version>/<version>'"$ORIGINAL_VERSION"'<\/version>/ }' -i hadoop-tools/hadoop-tools-dist/pom.xml

# Create snapshot of the source code including custom patches
tar -czf /stackable/hadoop-${PRODUCT}-src.tar.gz .
tar -czf /stackable/hadoop-${NEW_VERSION}-src.tar.gz .

mvn \
--batch-mode \
--no-transfer-progress \
clean package \
clean package install \
-Pdist,native \
-pl '!hadoop-tools/hadoop-pipes,!hadoop-yarn-project,!hadoop-mapreduce-project,!hadoop-minicluster' \
-pl '!hadoop-tools/hadoop-pipes' \
-Dhadoop.version=${NEW_VERSION} \
-Drequire.fuse=true \
-DskipTests \
-Dmaven.javadoc.skip=true

cp -r hadoop-dist/target/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT}
mv hadoop-dist/target/bom.json /stackable/hadoop-${PRODUCT}/hadoop-${PRODUCT}.cdx.json
mkdir -p /stackable/patched-libs/maven/org/apache
cp -r /stackable/.m2/repository/org/apache/hadoop /stackable/patched-libs/maven/org/apache

cp -r hadoop-dist/target/hadoop-${NEW_VERSION} /stackable/hadoop-${NEW_VERSION}
sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" hadoop-dist/target/bom.json
mv hadoop-dist/target/bom.json /stackable/hadoop-${NEW_VERSION}/hadoop-${NEW_VERSION}.cdx.json

# HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves
cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin
rm -rf /build/hadoop-${PRODUCT}-src
cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${NEW_VERSION}/bin

# Remove source code
(cd .. && rm -r ${PRODUCT})

ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop
ln -s /stackable/hadoop-${NEW_VERSION} /stackable/hadoop

mv /build/fuse_dfs_wrapper /stackable/hadoop/bin

Expand All @@ -109,9 +126,9 @@ rm -rf /stackable/hadoop/share/hadoop/tools/sources/
rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar
rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar
rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar
find /stackable -name 'hadoop-minicluster-*.jar' -type f -delete
find /stackable -name 'hadoop-client-minicluster-*.jar' -type f -delete
find /stackable -name 'hadoop-*tests.jar' -type f -delete
find /stackable/hadoop -name 'hadoop-minicluster-*.jar' -type f -delete
find /stackable/hadoop -name 'hadoop-client-minicluster-*.jar' -type f -delete
find /stackable/hadoop -name 'hadoop-*tests.jar' -type f -delete
rm -rf /stackable/.m2

# Set correct groups; make sure only required artifacts for the final image are located in /stackable
Expand Down Expand Up @@ -187,7 +204,7 @@ LABEL \
description="This image is deployed by the Stackable Operator for Apache Hadoop / HDFS."

COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable /stackable
COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}-stackable${RELEASE}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}-src.tar.gz /stackable

COPY --chown=${STACKABLE_USER_UID}:0 hadoop/licenses /licenses
Expand Down
Loading