From 8bb10274454a898e0ddb1827225cb17ea9f5bebf Mon Sep 17 00:00:00 2001 From: dervoeti Date: Fri, 13 Jun 2025 15:40:51 +0200 Subject: [PATCH 1/4] feat: support custom product versions --- conf.py | 2 + druid/Dockerfile | 36 +++-- .../30.0.1/0010-Fix-CVE-2023-34455.patch | 43 ------ .../31.0.1/0010-Fix-CVE-2023-34455.patch | 43 ------ .../33.0.0/0010-Fix-CVE-2023-34455.patch | 43 ------ druid/versions.py | 3 + hadoop/Dockerfile | 41 ++++-- ...op-client-modules-before-hadoop-dist.patch | 25 ++++ .../3.3.6/0011-Remove-Hadoop-benchmark.patch | 21 +++ hbase/Dockerfile | 128 +++--------------- hbase/hbase-operator-tools/Dockerfile | 46 +++++-- hbase/hbase-operator-tools/versions.py | 14 +- hbase/hbase/Dockerfile | 104 ++++++++++++++ .../stackable/bin/export-snapshot-to-s3.env | 0 .../stackable/bin/hbase-entrypoint.sh | 0 hbase/hbase/stackable/bin/hbck2.env | 5 + ...28242-Updates-async-profiler-support.patch | 0 ...dencies-which-have-a-new-patch-updat.patch | 0 .../0003-Include-jackson-dataformat-xml.patch | 0 ...aven-plugin-to-version-2.9.1-and-twe.patch | 0 .../stackable/patches/2.6.1/patchable.toml | 0 ...28242-Updates-async-profiler-support.patch | 0 ...dencies-which-have-a-new-patch-updat.patch | 0 .../0003-Include-jackson-dataformat-xml.patch | 0 ...aven-plugin-to-version-2.9.1-and-twe.patch | 0 .../stackable/patches/2.6.2/patchable.toml | 0 .../stackable/patches/patchable.toml | 0 hbase/hbase/versions.py | 20 +++ hbase/phoenix/Dockerfile | 43 ++++-- ...egex-to-match-custom-stackable-versi.patch | 22 +++ hbase/phoenix/versions.py | 16 ++- hbase/stackable/bin/hbck2.env | 5 - hbase/versions.py | 12 +- hive/Dockerfile | 56 ++++---- spark-k8s/Dockerfile | 79 +++++++---- 35 files changed, 452 insertions(+), 355 deletions(-) delete mode 100644 druid/stackable/patches/30.0.1/0010-Fix-CVE-2023-34455.patch delete mode 100644 druid/stackable/patches/31.0.1/0010-Fix-CVE-2023-34455.patch delete mode 100644 druid/stackable/patches/33.0.0/0010-Fix-CVE-2023-34455.patch create mode 100644 hadoop/stackable/patches/3.3.6/0010-Build-hadoop-client-modules-before-hadoop-dist.patch create mode 100644 hadoop/stackable/patches/3.3.6/0011-Remove-Hadoop-benchmark.patch create mode 100644 hbase/hbase/Dockerfile rename hbase/{ => hbase}/stackable/bin/export-snapshot-to-s3.env (100%) rename hbase/{ => hbase}/stackable/bin/hbase-entrypoint.sh (100%) create mode 100755 hbase/hbase/stackable/bin/hbck2.env rename hbase/{ => hbase}/stackable/patches/2.6.1/0001-HBASE-28242-Updates-async-profiler-support.patch (100%) rename hbase/{ => hbase}/stackable/patches/2.6.1/0002-Update-all-dependencies-which-have-a-new-patch-updat.patch (100%) rename hbase/{ => hbase}/stackable/patches/2.6.1/0003-Include-jackson-dataformat-xml.patch (100%) rename hbase/{ => hbase}/stackable/patches/2.6.1/0004-Bump-cyclonedx-maven-plugin-to-version-2.9.1-and-twe.patch (100%) rename hbase/{ => hbase}/stackable/patches/2.6.1/patchable.toml (100%) rename hbase/{ => hbase}/stackable/patches/2.6.2/0001-HBASE-28242-Updates-async-profiler-support.patch (100%) rename hbase/{ => hbase}/stackable/patches/2.6.2/0002-Update-all-dependencies-which-have-a-new-patch-updat.patch (100%) rename hbase/{ => hbase}/stackable/patches/2.6.2/0003-Include-jackson-dataformat-xml.patch (100%) rename hbase/{ => hbase}/stackable/patches/2.6.2/0004-Bump-cyclonedx-maven-plugin-to-version-2.9.1-and-twe.patch (100%) rename hbase/{ => hbase}/stackable/patches/2.6.2/patchable.toml (100%) rename hbase/{ => hbase}/stackable/patches/patchable.toml (100%) create mode 100644 hbase/hbase/versions.py create mode 100644 hbase/phoenix/stackable/patches/5.2.1/0003-Adjust-version-regex-to-match-custom-stackable-versi.patch delete mode 100755 hbase/stackable/bin/hbck2.env diff --git a/conf.py b/conf.py index 2840a9511..b5d0f7925 100644 --- a/conf.py +++ b/conf.py @@ -14,6 +14,7 @@ druid = importlib.import_module("druid.versions") hadoop = importlib.import_module("hadoop.versions") hbase = importlib.import_module("hbase.versions") +hbase_jars = importlib.import_module("hbase.hbase.versions") hbase_phoenix = importlib.import_module("hbase.phoenix.versions") hbase_opa_authorizer = importlib.import_module("hbase.hbase-opa-authorizer.versions") hbase_operator_tools = importlib.import_module("hbase.hbase-operator-tools.versions") @@ -47,6 +48,7 @@ {"name": "druid", "versions": druid.versions}, {"name": "hadoop", "versions": hadoop.versions}, {"name": "hbase", "versions": hbase.versions}, + {"name": "hbase/hbase", "versions": hbase_jars.versions}, {"name": "hbase/phoenix", "versions": hbase_phoenix.versions}, {"name": "hbase/hbase-opa-authorizer", "versions": hbase_opa_authorizer.versions}, {"name": "hbase/hbase-operator-tools", "versions": hbase_operator_tools.versions}, diff --git a/druid/Dockerfile b/druid/Dockerfile index d8dd84f4f..ba5638b9f 100644 --- a/druid/Dockerfile +++ b/druid/Dockerfile @@ -1,14 +1,18 @@ # syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7 # check=error=true +FROM stackable/image/hadoop AS hadoop-builder + FROM stackable/image/java-devel AS druid-builder ARG PRODUCT +ARG RELEASE ARG JACKSON_DATAFORMAT_XML ARG STAX2_API ARG WOODSTOX_CORE ARG AUTHORIZER ARG STACKABLE_USER_UID +ARG HADOOP # Setting this to anything other than "true" will keep the cache folders around (e.g. for Maven, NPM etc.) # This can be used to speed up builds when disk space is of no concern. @@ -35,6 +39,7 @@ WORKDIR /stackable COPY --chown=${STACKABLE_USER_UID}:0 druid/stackable/patches/patchable.toml /stackable/src/druid/stackable/patches/patchable.toml COPY --chown=${STACKABLE_USER_UID}:0 druid/stackable/patches/${PRODUCT} /stackable/src/druid/stackable/patches/${PRODUCT} +COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs # Cache mounts are owned by root by default # We need to explicitly give the uid to use which is hardcoded to "1000" in stackable-base # The cache id has to include the product version that we are building because otherwise @@ -54,24 +59,33 @@ RUN --mount=type=cache,id=maven-${PRODUCT},uid=${STACKABLE_USER_UID},target=/sta cd "$(cat /tmp/DRUID_SOURCE_DIR)" || exit 1 rm /tmp/DRUID_SOURCE_DIR +ORIGINAL_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) +NEW_VERSION="${PRODUCT}-stackable${RELEASE}" + +mvn versions:set -DnewVersion=$NEW_VERSION + +# Make Maven aware of custom Stackable libraries +cp -r /stackable/patched-libs/maven/* /stackable/.m2/repository + # Create snapshot of the source code including custom patches -tar -czf /stackable/druid-${PRODUCT}-src.tar.gz . +tar -czf /stackable/druid-${NEW_VERSION}-src.tar.gz . mvn \ --batch-mode \ --no-transfer-progress \ clean install \ -Pdist,stackable-bundle-contrib-exts \ - -Dmaven.test.skip `# Skip test compilation` \ + -Dhadoop.compile.version=${HADOOP}-stackable${RELEASE} \ -DskipTests `# Skip test execution` \ -Dcheckstyle.skip `# Skip checkstyle checks. We dont care if the code is properly formatted, it just wastes time` \ -Dmaven.javadoc.skip=true `# Dont generate javadoc` \ -Dmaven.gitcommitid.skip=true `# The gitcommitid plugin cannot work with git workspaces (ie: patchable)` \ $(if [[ ${PRODUCT} != 30.* ]]; then echo --projects '!quidem-ut'; fi) `# This is just a maven module for tests. https://github.com/apache/druid/pull/16867 added https://raw.githubusercontent.com/kgyrtkirk/datasets/repo/ as a Maven repository, which fails to pull for us (Failed to execute goal on project druid-quidem-ut: Could not resolve dependencies for project org.apache.druid:druid-quidem-ut:jar:33.0.0: com.github.kgyrtkirk.datasets:kttm-nested:jar:0.1 was not found in https://build-repo.stackable.tech/repository/maven-public/). By disabling the maven module we dont pull in this weird dependency...` -mv distribution/target/apache-druid-${PRODUCT}-bin/apache-druid-${PRODUCT} /stackable/ -mv distribution/target/bom.json /stackable/apache-druid-${PRODUCT}/apache-druid-${PRODUCT}.cdx.json -rm -rf /stackable/apache-druid-${PRODUCT}-src +mv distribution/target/apache-druid-${NEW_VERSION}-bin/apache-druid-${NEW_VERSION} /stackable/ +sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" distribution/target/bom.json +mv distribution/target/bom.json /stackable/apache-druid-${NEW_VERSION}/apache-druid-${NEW_VERSION}.cdx.json +rm -rf /stackable/apache-druid-${NEW_VERSION}-src # We're removing these to make the intermediate layer smaller # This can be necessary even though it's only a builder image because the GitHub Action Runners only have very limited space available @@ -83,11 +97,11 @@ if [ "${DELETE_CACHES}" = "true" ] ; then rm -rf /stackable/.cache/* fi -# Do not remove the /stackable/apache-druid-${PRODUCT}/quickstart folder, it is needed for loading the Wikipedia +# Do not remove the /stackable/apache-druid-${NEW_VERSION}/quickstart folder, it is needed for loading the Wikipedia # testdata in kuttl tests and the getting started guide. # Install OPA authorizer extension. -curl "https://repo.stackable.tech/repository/packages/druid/druid-opa-authorizer-${AUTHORIZER}.tar.gz" | tar -xzC /stackable/apache-druid-${PRODUCT}/extensions +curl "https://repo.stackable.tech/repository/packages/druid/druid-opa-authorizer-${AUTHORIZER}.tar.gz" | tar -xzC /stackable/apache-druid-${NEW_VERSION}/extensions # change groups chmod -R g=u /stackable @@ -122,8 +136,8 @@ LABEL io.k8s.description="${DESCRIPTION}" LABEL io.k8s.display-name="${NAME}" -COPY --chown=${STACKABLE_USER_UID}:0 --from=druid-builder /stackable/apache-druid-${PRODUCT} /stackable/apache-druid-${PRODUCT} -COPY --chown=${STACKABLE_USER_UID}:0 --from=druid-builder /stackable/druid-${PRODUCT}-src.tar.gz /stackable +COPY --chown=${STACKABLE_USER_UID}:0 --from=druid-builder /stackable/apache-druid-${PRODUCT}-stackable${RELEASE} /stackable/apache-druid-${PRODUCT}-stackable${RELEASE} +COPY --chown=${STACKABLE_USER_UID}:0 --from=druid-builder /stackable/druid-${PRODUCT}-stackable${RELEASE}-src.tar.gz /stackable COPY --chown=${STACKABLE_USER_UID}:0 druid/stackable/bin /stackable/bin COPY --chown=${STACKABLE_USER_UID}:0 druid/licenses /licenses @@ -136,7 +150,7 @@ chown ${STACKABLE_USER_UID}:0 /stackable/package_manifest.txt chmod g=u /stackable/package_manifest.txt rm -rf /var/cache/yum -ln -sf /stackable/apache-druid-${PRODUCT} /stackable/druid +ln -sf /stackable/apache-druid-${PRODUCT}-stackable${RELEASE} /stackable/druid chown -h ${STACKABLE_USER_UID}:0 stackable/druid # Force to overwrite the existing 'run-druid' @@ -145,7 +159,7 @@ chown -h ${STACKABLE_USER_UID}:0 /stackable/druid/bin/run-druid # fix missing permissions chmod -R g=u /stackable/bin -chmod g=u /stackable/apache-druid-${PRODUCT} +chmod g=u /stackable/apache-druid-${PRODUCT}-stackable${RELEASE} /stackable/druid-${PRODUCT}-stackable${RELEASE}-src.tar.gz EOF # ---------------------------------------- diff --git a/druid/stackable/patches/30.0.1/0010-Fix-CVE-2023-34455.patch b/druid/stackable/patches/30.0.1/0010-Fix-CVE-2023-34455.patch deleted file mode 100644 index fab4b0f0d..000000000 --- a/druid/stackable/patches/30.0.1/0010-Fix-CVE-2023-34455.patch +++ /dev/null @@ -1,43 +0,0 @@ -From f246bea0ec12b167b4fb49dcf775527429715f77 Mon Sep 17 00:00:00 2001 -From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> -Date: Tue, 28 Jan 2025 17:29:59 +0100 -Subject: Fix CVE-2023-34455 - -see https://github.com/stackabletech/vulnerabilities/issues/558 - -At the end of build process, Druid downloads dependencies directly from a remote -Maven repository ignoring existing patches that have been applyed locally. -These dependencies include all transitive dependencies too. -The hadoop client depends on a vulnerable version of the snappy library which -is then also downloaded even though a newer version is already on the system. - -This patch removes the vulnerable jars. ---- - distribution/pom.xml | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/distribution/pom.xml b/distribution/pom.xml -index 08b4121287..ba08137c26 100644 ---- a/distribution/pom.xml -+++ b/distribution/pom.xml -@@ -259,6 +259,20 @@ - - - -+ -+ fix-cve-2023-34455-remove-snappy -+ package -+ -+ exec -+ -+ -+ /usr/bin/rm -+ -+ ${project.build.directory}/hadoop-dependencies/hadoop-client-api/3.3.6/snappy-java-1.1.8.2.jar -+ ${project.build.directory}/hadoop-dependencies/hadoop-client-runtime/3.3.6/snappy-java-1.1.8.2.jar -+ -+ -+ - - - diff --git a/druid/stackable/patches/31.0.1/0010-Fix-CVE-2023-34455.patch b/druid/stackable/patches/31.0.1/0010-Fix-CVE-2023-34455.patch deleted file mode 100644 index c69d2f85e..000000000 --- a/druid/stackable/patches/31.0.1/0010-Fix-CVE-2023-34455.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 90f6dd1211a4d4ced8b3a75b7549b1e68e4b6ee6 Mon Sep 17 00:00:00 2001 -From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> -Date: Tue, 28 Jan 2025 17:29:59 +0100 -Subject: Fix CVE-2023-34455 - -see https://github.com/stackabletech/vulnerabilities/issues/558 - -At the end of build process, Druid downloads dependencies directly from a remote -Maven repository ignoring existing patches that have been applyed locally. -These dependencies include all transitive dependencies too. -The hadoop client depends on a vulnerable version of the snappy library which -is then also downloaded even though a newer version is already on the system. - -This patch removes the vulnerable jars. ---- - distribution/pom.xml | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/distribution/pom.xml b/distribution/pom.xml -index a28e34bb6a..4ab7837538 100644 ---- a/distribution/pom.xml -+++ b/distribution/pom.xml -@@ -259,6 +259,20 @@ - - - -+ -+ fix-cve-2023-34455-remove-snappy -+ package -+ -+ exec -+ -+ -+ /usr/bin/rm -+ -+ ${project.build.directory}/hadoop-dependencies/hadoop-client-api/3.3.6/snappy-java-1.1.8.2.jar -+ ${project.build.directory}/hadoop-dependencies/hadoop-client-runtime/3.3.6/snappy-java-1.1.8.2.jar -+ -+ -+ - - - diff --git a/druid/stackable/patches/33.0.0/0010-Fix-CVE-2023-34455.patch b/druid/stackable/patches/33.0.0/0010-Fix-CVE-2023-34455.patch deleted file mode 100644 index 8d6b57ebb..000000000 --- a/druid/stackable/patches/33.0.0/0010-Fix-CVE-2023-34455.patch +++ /dev/null @@ -1,43 +0,0 @@ -From cd10ee4dc6abb7131f28dbf6e8aceed4af2bc7f8 Mon Sep 17 00:00:00 2001 -From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> -Date: Tue, 28 Jan 2025 17:29:59 +0100 -Subject: Fix CVE-2023-34455 - -see https://github.com/stackabletech/vulnerabilities/issues/558 - -At the end of build process, Druid downloads dependencies directly from a remote -Maven repository ignoring existing patches that have been applyed locally. -These dependencies include all transitive dependencies too. -The hadoop client depends on a vulnerable version of the snappy library which -is then also downloaded even though a newer version is already on the system. - -This patch removes the vulnerable jars. ---- - distribution/pom.xml | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/distribution/pom.xml b/distribution/pom.xml -index c8b7e13054..f93af34434 100644 ---- a/distribution/pom.xml -+++ b/distribution/pom.xml -@@ -261,6 +261,20 @@ - - - -+ -+ fix-cve-2023-34455-remove-snappy -+ package -+ -+ exec -+ -+ -+ /usr/bin/rm -+ -+ ${project.build.directory}/hadoop-dependencies/hadoop-client-api/3.3.6/snappy-java-1.1.8.2.jar -+ ${project.build.directory}/hadoop-dependencies/hadoop-client-runtime/3.3.6/snappy-java-1.1.8.2.jar -+ -+ -+ - - - diff --git a/druid/versions.py b/druid/versions.py index c881852b7..323868f1c 100644 --- a/druid/versions.py +++ b/druid/versions.py @@ -4,6 +4,7 @@ # https://druid.apache.org/docs/30.0.1/operations/java/ "java-base": "17", "java-devel": "17", + "hadoop": "3.3.6", "authorizer": "0.7.0", }, { @@ -11,6 +12,7 @@ # https://druid.apache.org/docs/31.0.1/operations/java/ "java-base": "17", "java-devel": "17", + "hadoop": "3.3.6", "authorizer": "0.7.0", }, { @@ -18,6 +20,7 @@ # https://druid.apache.org/docs/33.0.0/operations/java/ "java-base": "17", "java-devel": "17", + "hadoop": "3.3.6", "authorizer": "0.7.0", }, ] diff --git a/hadoop/Dockerfile b/hadoop/Dockerfile index 36ac0f903..3997fb1fb 100644 --- a/hadoop/Dockerfile +++ b/hadoop/Dockerfile @@ -4,6 +4,7 @@ FROM stackable/image/java-devel AS hadoop-builder ARG PRODUCT +ARG RELEASE ARG ASYNC_PROFILER ARG JMX_EXPORTER ARG PROTOBUF @@ -66,6 +67,7 @@ COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches/patchable.toml /bu COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches/${PRODUCT} /build/src/hadoop/stackable/patches/${PRODUCT} COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/fuse_dfs_wrapper /build COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx +USER ${STACKABLE_USER_UID} # Hadoop Pipes requires libtirpc to build, whose headers are not packaged in RedHat UBI, so skip building this module # Build from source to enable FUSE module, and to apply custom patches. # Also skip building the yarn, mapreduce and minicluster modules: this will result in the modules being excluded but not all @@ -74,27 +76,42 @@ COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx RUN <hadoop-pipes<\/artifactId>/,/<\/dependency>/ { s/.*<\/version>/'"$ORIGINAL_VERSION"'<\/version>/ }' -i hadoop-tools/hadoop-tools-dist/pom.xml + # Create snapshot of the source code including custom patches -tar -czf /stackable/hadoop-${PRODUCT}-src.tar.gz . +tar -czf /stackable/hadoop-${NEW_VERSION}-src.tar.gz . mvn \ --batch-mode \ --no-transfer-progress \ - clean package \ + clean package install \ -Pdist,native \ - -pl '!hadoop-tools/hadoop-pipes,!hadoop-yarn-project,!hadoop-mapreduce-project,!hadoop-minicluster' \ + -pl '!hadoop-tools/hadoop-pipes' \ + -Dhadoop.version=${NEW_VERSION} \ -Drequire.fuse=true \ -DskipTests \ -Dmaven.javadoc.skip=true -cp -r hadoop-dist/target/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT} -mv hadoop-dist/target/bom.json /stackable/hadoop-${PRODUCT}/hadoop-${PRODUCT}.cdx.json +mkdir -p /stackable/patched-libs/maven/org/apache +cp -r /stackable/.m2/repository/org/apache/hadoop /stackable/patched-libs/maven/org/apache + +cp -r hadoop-dist/target/hadoop-${NEW_VERSION} /stackable/hadoop-${NEW_VERSION} +sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" hadoop-dist/target/bom.json +mv hadoop-dist/target/bom.json /stackable/hadoop-${NEW_VERSION}/hadoop-${NEW_VERSION}.cdx.json # HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves -cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin -rm -rf /build/hadoop-${PRODUCT}-src +cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${NEW_VERSION}/bin + +# Remove source code +(cd .. && rm -r ${PRODUCT}) -ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop +ln -s /stackable/hadoop-${NEW_VERSION} /stackable/hadoop mv /build/fuse_dfs_wrapper /stackable/hadoop/bin @@ -109,9 +126,9 @@ rm -rf /stackable/hadoop/share/hadoop/tools/sources/ rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar -find /stackable -name 'hadoop-minicluster-*.jar' -type f -delete -find /stackable -name 'hadoop-client-minicluster-*.jar' -type f -delete -find /stackable -name 'hadoop-*tests.jar' -type f -delete +find /stackable/hadoop -name 'hadoop-minicluster-*.jar' -type f -delete +find /stackable/hadoop -name 'hadoop-client-minicluster-*.jar' -type f -delete +find /stackable/hadoop -name 'hadoop-*tests.jar' -type f -delete rm -rf /stackable/.m2 # Set correct groups; make sure only required artifacts for the final image are located in /stackable @@ -187,7 +204,7 @@ LABEL \ description="This image is deployed by the Stackable Operator for Apache Hadoop / HDFS." COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable /stackable -COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar +COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}-stackable${RELEASE}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}-src.tar.gz /stackable COPY --chown=${STACKABLE_USER_UID}:0 hadoop/licenses /licenses diff --git a/hadoop/stackable/patches/3.3.6/0010-Build-hadoop-client-modules-before-hadoop-dist.patch b/hadoop/stackable/patches/3.3.6/0010-Build-hadoop-client-modules-before-hadoop-dist.patch new file mode 100644 index 000000000..45e516906 --- /dev/null +++ b/hadoop/stackable/patches/3.3.6/0010-Build-hadoop-client-modules-before-hadoop-dist.patch @@ -0,0 +1,25 @@ +From 786f8efde24951c73f3c022d0f96733b78d695ae Mon Sep 17 00:00:00 2001 +From: dervoeti +Date: Fri, 13 Jun 2025 15:38:45 +0200 +Subject: Build hadoop-client-modules before hadoop-dist + +This is needed, because dist depends on parts of client-modules. At least when specifying a custom version when building Hadoop, Maven for some reason does not build the client-modules before dist and the build fails. +--- + pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/pom.xml b/pom.xml +index de001775ab..ccb15235c8 100644 +--- a/pom.xml ++++ b/pom.xml +@@ -133,9 +133,9 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + hadoop-yarn-project + hadoop-mapreduce-project + hadoop-tools ++ hadoop-client-modules + hadoop-dist + hadoop-minicluster +- hadoop-client-modules + hadoop-build-tools + hadoop-cloud-storage-project + diff --git a/hadoop/stackable/patches/3.3.6/0011-Remove-Hadoop-benchmark.patch b/hadoop/stackable/patches/3.3.6/0011-Remove-Hadoop-benchmark.patch new file mode 100644 index 000000000..014521c5f --- /dev/null +++ b/hadoop/stackable/patches/3.3.6/0011-Remove-Hadoop-benchmark.patch @@ -0,0 +1,21 @@ +From eb7a5e6b23118d016c2d3450c7ea3a3d82e39545 Mon Sep 17 00:00:00 2001 +From: dervoeti +Date: Fri, 13 Jun 2025 15:39:07 +0200 +Subject: Remove Hadoop benchmark + +--- + hadoop-tools/pom.xml | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hadoop-tools/pom.xml b/hadoop-tools/pom.xml +index 88b3da867b..08811db902 100644 +--- a/hadoop-tools/pom.xml ++++ b/hadoop-tools/pom.xml +@@ -50,7 +50,6 @@ + hadoop-azure-datalake + hadoop-aliyun + hadoop-fs2img +- hadoop-benchmark + + + diff --git a/hbase/Dockerfile b/hbase/Dockerfile index 0d93e48b5..6f9b650c5 100644 --- a/hbase/Dockerfile +++ b/hbase/Dockerfile @@ -3,111 +3,20 @@ FROM stackable/image/hadoop AS hadoop-builder +FROM stackable/image/hbase/hbase AS hbase-builder + FROM stackable/image/hbase/phoenix AS phoenix FROM stackable/image/hbase/hbase-operator-tools AS hbase-operator-tools FROM stackable/image/hbase/hbase-opa-authorizer AS hbase-opa-authorizer -FROM stackable/image/java-devel AS hbase-builder - -ARG PRODUCT -ARG HBASE_HBASE_OPERATOR_TOOLS -ARG ASYNC_PROFILER -ARG HBASE_PROFILE -ARG HADOOP -ARG TARGETARCH -ARG TARGETOS -ARG STACKABLE_USER_UID - -# Setting this to anything other than "true" will keep the cache folders around (e.g. for Maven, NPM etc.) -# This can be used to speed up builds when disk space is of no concern. -ARG DELETE_CACHES="true" - -COPY hbase/licenses /licenses - -USER ${STACKABLE_USER_UID} -WORKDIR /stackable - -COPY --chown=${STACKABLE_USER_UID}:0 hbase/stackable/patches/patchable.toml /stackable/src/hbase/stackable/patches/patchable.toml -COPY --chown=${STACKABLE_USER_UID}:0 hbase/stackable/patches/${PRODUCT} /stackable/src/hbase/stackable/patches/${PRODUCT} - -# Cache mounts are owned by root by default -# We need to explicitly give the uid to use -# And every cache needs its own id, we can't share them between stages because we might delete the caches -# at the end of a run while other stages are still using it. -# While this might work in theory it didn't in practice (FileNotFound exceptions etc.) - -# The cache id has to include the product version that we are building because otherwise -# docker encounters race conditions when building multiple versions in parallel, as all -# builder containers will share the same cache and the `rm -rf` commands will fail -# with a "directory not empty" error on the first builder to finish, as other builders -# are still working in the cache directory. -RUN --mount=type=cache,id=maven-hbase-${PRODUCT},uid=${STACKABLE_USER_UID},target=/stackable/.m2/repository < /stackable/bin/export-snapshot-to-s3 +envsubst '${HBASE_HBASE}:${LIBS}' < /stackable/bin/export-snapshot-to-s3.env > /stackable/bin/export-snapshot-to-s3 chmod +x /stackable/bin/export-snapshot-to-s3 rm /stackable/bin/export-snapshot-to-s3.env @@ -148,6 +57,7 @@ ARG PRODUCT ARG RELEASE ARG HADOOP ARG HBASE_PROFILE +ARG HBASE_HBASE ARG HBASE_HBASE_OPERATOR_TOOLS ARG HBASE_HBASE_OPA_AUTHORIZER ARG HBASE_PHOENIX @@ -175,31 +85,31 @@ LABEL io.openshift.tags="ubi9,stackable,hbase,sdp,nosql" LABEL io.k8s.description="${DESCRIPTION}" LABEL io.k8s.display-name="${NAME}" -COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-builder /stackable/hbase-${PRODUCT} /stackable/hbase-${PRODUCT}/ -COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-builder /stackable/hbase-${PRODUCT}-src.tar.gz /stackable +COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-builder /stackable/hbase-${HBASE_HBASE}-stackable${RELEASE} /stackable/hbase-${HBASE_HBASE}-stackable${RELEASE}/ +COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-builder /stackable/hbase-${HBASE_HBASE}-stackable${RELEASE}-src.tar.gz /stackable COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-builder /stackable/async-profiler /stackable/async-profiler/ -COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-operator-tools /stackable/hbase-operator-tools-${HBASE_HBASE_OPERATOR_TOOLS} /stackable/hbase-operator-tools-${HBASE_HBASE_OPERATOR_TOOLS}/ -COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-operator-tools /stackable/hbase-operator-tools-${HBASE_HBASE_OPERATOR_TOOLS}-src.tar.gz /stackable +COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-operator-tools /stackable/hbase-operator-tools-${HBASE_HBASE_OPERATOR_TOOLS}-stackable${RELEASE} /stackable/hbase-operator-tools-${HBASE_HBASE_OPERATOR_TOOLS}-stackable${RELEASE}/ +COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-operator-tools /stackable/hbase-operator-tools-${HBASE_HBASE_OPERATOR_TOOLS}-stackable${RELEASE}-src.tar.gz /stackable COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-operator-tools /stackable/bin/hbck2 /stackable/bin/hbck2 -COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-operator-tools /stackable/bin/hbase-entrypoint.sh /stackable/hbase-${PRODUCT}/bin/hbase-entrypoint.sh +COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-operator-tools /stackable/bin/hbase-entrypoint.sh /stackable/hbase-${HBASE_HBASE}-stackable${RELEASE}/bin/hbase-entrypoint.sh COPY --chown=${STACKABLE_USER_UID}:0 --from=phoenix /stackable/phoenix /stackable/phoenix/ -COPY --chown=${STACKABLE_USER_UID}:0 --from=phoenix /stackable/phoenix-${HBASE_PHOENIX}-src.tar.gz /stackable +COPY --chown=${STACKABLE_USER_UID}:0 --from=phoenix /stackable/phoenix-${HBASE_PHOENIX}-stackable${RELEASE}-src.tar.gz /stackable COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-s3-builder /stackable/bin/export-snapshot-to-s3 /stackable/bin/export-snapshot-to-s3 COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-s3-builder /stackable/hadoop/share/hadoop/tools/lib/ /stackable/hadoop/share/hadoop/tools/lib/ # Copy the dependencies from Hadoop which are required for the Azure Data Lake -# Storage (ADLS) to /stackable/hbase-${PRODUCT}/lib which is on the classpath. +# Storage (ADLS) to /stackable/hbase-${HBASE_HBASE}/lib which is on the classpath. # hadoop-azure-${HADOOP}.jar contains the AzureBlobFileSystem which is required # by hadoop-common-${HADOOP}.jar if the scheme of a file system is "abfs://". COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder \ - /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP}.jar \ - /stackable/hbase-${PRODUCT}/lib/ + /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP}-stackable${RELEASE}.jar \ + /stackable/hbase-${HBASE_HBASE}-stackable${RELEASE}/lib/ COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-opa-authorizer /stackable/hbase-opa-authorizer-${HBASE_HBASE_OPA_AUTHORIZER}-src.tar.gz /stackable -COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-opa-authorizer /stackable/hbase-opa-authorizer/target/hbase-opa-authorizer*.jar /stackable/hbase-${PRODUCT}/lib +COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-opa-authorizer /stackable/hbase-opa-authorizer/target/hbase-opa-authorizer*.jar /stackable/hbase-${HBASE_HBASE}-stackable${RELEASE}/lib RUN < /stackable/bin/hbck2 +PATCHED_HBASE_OPERATOR_TOOLS_VERSION=${NEW_VERSION} +export PATCHED_HBASE_VERSION FULL_HBASE_OPERATOR_TOOLS_VERSION PATCHED_HBASE_OPERATOR_TOOLS_VERSION +envsubst '${PATCHED_HBASE_VERSION}:${FULL_HBASE_OPERATOR_TOOLS_VERSION}:${PATCHED_HBASE_OPERATOR_TOOLS_VERSION}' < /stackable/bin/hbck2.env > /stackable/bin/hbck2 chmod +x /stackable/bin/hbck2 rm /stackable/bin/hbck2.env diff --git a/hbase/hbase-operator-tools/versions.py b/hbase/hbase-operator-tools/versions.py index 22a6aaf41..9fbee3e66 100644 --- a/hbase/hbase-operator-tools/versions.py +++ b/hbase/hbase-operator-tools/versions.py @@ -1,15 +1,19 @@ versions = [ { - "product": "1.2.0", - "hbase_thirdparty": "4.1.5", - "hbase_version": "2.4.18", + "product": "1.3.0-fd5a5fb-hbase2.6.1", + "hbase_operator_tools_version": "1.3.0-fd5a5fb", + "hadoop": "3.3.6", + "hbase_thirdparty": "4.1.9", + "hbase/hbase": "2.6.1", "java-devel": "11", "delete_caches": "true", }, { - "product": "1.3.0-fd5a5fb", + "product": "1.3.0-fd5a5fb-hbase2.6.2", + "hbase_operator_tools_version": "1.3.0-fd5a5fb", + "hadoop": "3.4.1", "hbase_thirdparty": "4.1.9", - "hbase_version": "2.6.1", + "hbase/hbase": "2.6.2", "java-devel": "11", "delete_caches": "true", }, diff --git a/hbase/hbase/Dockerfile b/hbase/hbase/Dockerfile new file mode 100644 index 000000000..62e347260 --- /dev/null +++ b/hbase/hbase/Dockerfile @@ -0,0 +1,104 @@ +FROM stackable/image/hadoop AS hadoop-builder + +FROM stackable/image/java-devel AS hbase-builder + +ARG PRODUCT +ARG RELEASE +ARG ASYNC_PROFILER +ARG HADOOP +ARG TARGETARCH +ARG TARGETOS +ARG STACKABLE_USER_UID + +# Setting this to anything other than "true" will keep the cache folders around (e.g. for Maven, NPM etc.) +# This can be used to speed up builds when disk space is of no concern. +ARG DELETE_CACHES="true" + +COPY hbase/licenses /licenses + +USER ${STACKABLE_USER_UID} +WORKDIR /stackable + +COPY --chown=${STACKABLE_USER_UID}:0 hbase/hbase/stackable/patches/patchable.toml /stackable/src/hbase/hbase/stackable/patches/patchable.toml +COPY --chown=${STACKABLE_USER_UID}:0 hbase/hbase/stackable/patches/${PRODUCT} /stackable/src/hbase/hbase/stackable/patches/${PRODUCT} + +COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs +# Cache mounts are owned by root by default +# We need to explicitly give the uid to use +# And every cache needs its own id, we can't share them between stages because we might delete the caches +# at the end of a run while other stages are still using it. +# While this might work in theory it didn't in practice (FileNotFound exceptions etc.) + +# The cache id has to include the product version that we are building because otherwise +# docker encounters race conditions when building multiple versions in parallel, as all +# builder containers will share the same cache and the `rm -rf` commands will fail +# with a "directory not empty" error on the first builder to finish, as other builders +# are still working in the cache directory. +RUN --mount=type=cache,id=maven-hbase-${PRODUCT},uid=${STACKABLE_USER_UID},target=/stackable/.m2/repository < +Date: Tue, 15 Apr 2025 11:11:36 +0200 +Subject: Adjust version regex to match custom stackable versions + +--- + phoenix-core-client/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/phoenix-core-client/pom.xml b/phoenix-core-client/pom.xml +index cbbebc9d5..ae0b37387 100644 +--- a/phoenix-core-client/pom.xml ++++ b/phoenix-core-client/pom.xml +@@ -48,7 +48,7 @@ + import java.util.regex.Pattern; + import java.lang.Integer; + +- versionPattern = Pattern.compile("(\\d+)\\.(\\d+)\\.(\\d+)[^.]*$"); ++ versionPattern = Pattern.compile("(\\d+)\\.(\\d+)\\.(\\d+)[a-z0-9\\-\\.]*$"); + versionMatcher = versionPattern.matcher("${hbase.version}"); + versionMatcher.find(); + diff --git a/hbase/phoenix/versions.py b/hbase/phoenix/versions.py index d5949fc41..e66c11f00 100644 --- a/hbase/phoenix/versions.py +++ b/hbase/phoenix/versions.py @@ -1,8 +1,18 @@ versions = [ { - "product": "5.2.1", - "hbase_version": "2.6.1", - "hadoop_version": "3.3.6", + "product": "5.2.1-hbase2.6.1", + "phoenix_version": "5.2.1", + "hbase/hbase": "2.6.1", + "hadoop": "3.3.6", + "java-devel": "11", + "hbase_profile": "2.6", + "delete_caches": "true", + }, + { + "product": "5.2.1-hbase2.6.2", + "phoenix_version": "5.2.1", + "hbase/hbase": "2.6.2", + "hadoop": "3.4.1", "java-devel": "11", "hbase_profile": "2.6", "delete_caches": "true", diff --git a/hbase/stackable/bin/hbck2.env b/hbase/stackable/bin/hbck2.env deleted file mode 100755 index 5049d0db8..000000000 --- a/hbase/stackable/bin/hbck2.env +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -exec /stackable/hbase-${PRODUCT}/bin/hbase hbck \ - -j /stackable/hbase-operator-tools-${HBASE_OPERATOR_TOOLS}/hbase-hbck2/hbase-hbck2-${HBASE_OPERATOR_TOOLS}.jar \ - "$@" diff --git a/hbase/versions.py b/hbase/versions.py index b787609c4..c2f9fdfbf 100644 --- a/hbase/versions.py +++ b/hbase/versions.py @@ -3,26 +3,26 @@ # hbase-thirdparty is used to build the hbase-operator-tools and should be set to the version defined in the POM of HBase. { "product": "2.6.1", - "hbase/hbase-operator-tools": "1.3.0-fd5a5fb", - "hbase/phoenix": "5.2.1", + "hbase/hbase": "2.6.1", + "hbase/hbase-operator-tools": "1.3.0-fd5a5fb-hbase2.6.1", + "hbase/phoenix": "5.2.1-hbase2.6.1", "hbase/hbase-opa-authorizer": "0.1.0", # only for HBase 2.6.1 "hadoop": "3.3.6", "java-base": "11", "java-devel": "11", "hbase_profile": "2.6", - "async_profiler": "2.9", "delete_caches": "true", }, { "product": "2.6.2", - "hbase/hbase-operator-tools": "1.3.0-fd5a5fb", - "hbase/phoenix": "5.2.1", + "hbase/hbase": "2.6.2", + "hbase/hbase-operator-tools": "1.3.0-fd5a5fb-hbase2.6.2", + "hbase/phoenix": "5.2.1-hbase2.6.2", "hbase/hbase-opa-authorizer": "0.1.0", # only for HBase 2.6.1 "hadoop": "3.4.1", "java-base": "11", "java-devel": "11", "hbase_profile": "2.6", - "async_profiler": "2.9", "delete_caches": "true", }, ] diff --git a/hive/Dockerfile b/hive/Dockerfile index 0fb56e4b8..9dc52d8b3 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -5,12 +5,14 @@ FROM stackable/image/hadoop AS hadoop-builder FROM stackable/image/java-devel AS hive-builder + # Apache Hive up to 4.0.x(!) officially requires Java 8 (there is no distinction between building and running). # As of 2024-04-15 we for sure need Java 8 for building, but we used a Java 11 runtime for months now without any problems. # As we got weird TLS errors (https://stackable-workspace.slack.com/archives/C031A5BEFS7/p1713185172557459) with a # Java 8 runtime we bumped the Runtime to Java 11 again. ARG PRODUCT +ARG RELEASE ARG HADOOP ARG JMX_EXPORTER ARG AWS_JAVA_SDK_BUNDLE @@ -28,7 +30,8 @@ COPY --chown=${STACKABLE_USER_UID}:0 hive/stackable/patches/${PRODUCT} /stackabl # It is useful to see which version of Hadoop is used at a glance # Therefore the use of the full name here # TODO: Do we really need all of Hadoop in here? -COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop /stackable/hadoop-${HADOOP} +COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop /stackable/hadoop-${HADOOP}-stackable${RELEASE} +COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs USER ${STACKABLE_USER_UID} WORKDIR /stackable @@ -39,26 +42,33 @@ RUN --mount=type=cache,id=maven-hive-${PRODUCT},uid=${STACKABLE_USER_UID},target BUILD_SRC_DIR="$(/stackable/patchable --images-repo-root=src checkout hive ${PRODUCT})" cd "$BUILD_SRC_DIR" +# Make Maven aware of custom Stackable libraries +cp -r /stackable/patched-libs/maven/* /stackable/.m2/repository + +ORIGINAL_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) +NEW_VERSION="${PRODUCT}-stackable${RELEASE}" + +mvn versions:set -DnewVersion=$NEW_VERSION -DartifactId=* -DgroupId=* + # Create snapshot of the source code including custom patches -tar -czf /stackable/hive-${PRODUCT}-src.tar.gz . +tar -czf /stackable/hive-${NEW_VERSION}-src.tar.gz . if [[ "${PRODUCT}" == "3.1.3" ]] ; then mvn --batch-mode --no-transfer-progress clean package -DskipTests --projects standalone-metastore - mv standalone-metastore/target/apache-hive-metastore-${PRODUCT}-bin/apache-hive-metastore-${PRODUCT}-bin /stackable - mv standalone-metastore/target/bom.json /stackable/apache-hive-metastore-${PRODUCT}-bin/apache-hive-metastore-${PRODUCT}.cdx.json + mv standalone-metastore/target/apache-hive-metastore-${NEW_VERSION}-bin/apache-hive-metastore-${NEW_VERSION}-bin /stackable + mv standalone-metastore/target/bom.json /stackable/apache-hive-metastore-${NEW_VERSION}-bin/apache-hive-metastore-${NEW_VERSION}.cdx.json else ( # https://issues.apache.org/jira/browse/HIVE-20451 switched the metastore server packaging starting with 4.0.0 - cd standalone-metastore - mvn --batch-mode --no-transfer-progress clean package -DskipTests --projects metastore-server + mvn --batch-mode --no-transfer-progress clean package -DskipTests -Dhadoop.version=${HADOOP}-stackable${RELEASE} # We only seem to get a .tar.gz archive, so let's extract that to the correct location - tar --extract --directory=/stackable -f metastore-server/target/apache-hive-standalone-metastore-server-${PRODUCT}-bin.tar.gz - mv metastore-server/target/bom.json /stackable/apache-hive-metastore-${PRODUCT}-bin/apache-hive-metastore-${PRODUCT}.cdx.json + tar --extract --directory=/stackable -f standalone-metastore/metastore-server/target/apache-hive-standalone-metastore-server-${NEW_VERSION}-bin.tar.gz + mv standalone-metastore/metastore-server/target/bom.json /stackable/apache-hive-metastore-${NEW_VERSION}-bin/apache-hive-metastore-${NEW_VERSION}.cdx.json # TODO: Remove once the fix https://github.com/apache/hive/pull/5419 is merged and released # The schemaTool.sh is still pointing to the class location from Hive < 4.0.0, it seems like it was forgotten to update it - sed -i -e 's/CLASS=org.apache.hadoop.hive.metastore.tools.MetastoreSchemaTool/CLASS=org.apache.hadoop.hive.metastore.tools.schematool.MetastoreSchemaTool/' /stackable/apache-hive-metastore-${PRODUCT}-bin/bin/ext/schemaTool.sh + sed -i -e 's/CLASS=org.apache.hadoop.hive.metastore.tools.MetastoreSchemaTool/CLASS=org.apache.hadoop.hive.metastore.tools.schematool.MetastoreSchemaTool/' /stackable/apache-hive-metastore-${NEW_VERSION}-bin/bin/ext/schemaTool.sh ) fi @@ -74,17 +84,17 @@ ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/j # This way the build will fail should one of the files not be available anymore in a later Hadoop version! # Add S3 Support for Hive (support for s3a://) -cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/hadoop-aws-${HADOOP}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ +cp /stackable/hadoop-${HADOOP}-stackable${RELEASE}/share/hadoop/tools/lib/hadoop-aws-${HADOOP}-stackable${RELEASE}.jar /stackable/apache-hive-metastore-${PRODUCT}-stackable${RELEASE}-bin/lib/ # According to https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html, the jar filename has changed from # aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar to bundle-${AWS_JAVA_SDK_BUNDLE}.jar. In future, you might need to do: -# cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ -cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ +# cp /stackable/hadoop-${HADOOP}-stackable${RELEASE}/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/apache-hive-metastore-${PRODUCT}-stackable${RELEASE}-bin/lib/ +cp /stackable/hadoop-${HADOOP}-stackable${RELEASE}/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/apache-hive-metastore-${PRODUCT}-stackable${RELEASE}-bin/lib/ # Add Azure ABFS support (support for abfs://) -cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/hadoop-azure-${HADOOP}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ -cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ -cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ +cp /stackable/hadoop-${HADOOP}-stackable${RELEASE}/share/hadoop/tools/lib/hadoop-azure-${HADOOP}-stackable${RELEASE}.jar /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/ +cp /stackable/hadoop-${HADOOP}-stackable${RELEASE}/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE}.jar /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/ +cp /stackable/hadoop-${HADOOP}-stackable${RELEASE}/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/ # We're removing these to make the intermediate layer smaller # This can be necessary even though it's only a builder image because the GitHub Action Runners only have very limited space available @@ -133,13 +143,13 @@ LABEL io.k8s.display-name="${NAME}" WORKDIR /stackable -COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/apache-hive-metastore-${PRODUCT}-bin -COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/hive-${PRODUCT}-src.tar.gz /stackable -COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/hadoop-${HADOOP} /stackable/hadoop-${HADOOP} -COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-${HADOOP}-src.tar.gz /stackable +COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/apache-hive-metastore-${PRODUCT}-stackable${RELEASE}-bin /stackable/apache-hive-metastore-${PRODUCT}-stackable${RELEASE}-bin +COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/hive-${PRODUCT}-stackable${RELEASE}-src.tar.gz /stackable +COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/hadoop-${HADOOP}-stackable${RELEASE} /stackable/hadoop-${HADOOP}-stackable${RELEASE} +COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-${HADOOP}-stackable${RELEASE}-src.tar.gz /stackable COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/jmx /stackable/jmx COPY --chown=${STACKABLE_USER_UID}:0 hive/stackable/jmx /stackable/jmx -COPY --chown=${STACKABLE_USER_UID}:0 hive/stackable/bin/start-metastore /stackable/apache-hive-metastore-${PRODUCT}-bin/bin +COPY --chown=${STACKABLE_USER_UID}:0 hive/stackable/bin/start-metastore /stackable/apache-hive-metastore-${PRODUCT}-stackable${RELEASE}-bin/bin COPY hive/licenses /licenses @@ -151,12 +161,12 @@ chown ${STACKABLE_USER_UID}:0 /stackable/package_manifest.txt chmod g=u /stackable/package_manifest.txt rm -rf /var/cache/yum -chmod g=u /stackable/apache-hive-metastore-${PRODUCT}-bin/bin/start-metastore +chmod g=u /stackable/apache-hive-metastore-${PRODUCT}-stackable${RELEASE}-bin/bin/start-metastore -ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/hive-metastore +ln -s /stackable/apache-hive-metastore-${PRODUCT}-stackable${RELEASE}-bin /stackable/hive-metastore chown -h ${STACKABLE_USER_UID}:0 /stackable/hive-metastore chmod g=u /stackable/hive-metastore -ln -s /stackable/hadoop-${HADOOP} /stackable/hadoop +ln -s /stackable/hadoop-${HADOOP}-stackable${RELEASE} /stackable/hadoop chown -h ${STACKABLE_USER_UID}:0 /stackable/hadoop chmod g=u /stackable/hadoop chmod g=u /stackable/*-src.tar.gz diff --git a/spark-k8s/Dockerfile b/spark-k8s/Dockerfile index 628acf5cc..052c99903 100644 --- a/spark-k8s/Dockerfile +++ b/spark-k8s/Dockerfile @@ -11,6 +11,7 @@ FROM stackable/image/hbase AS hbase-builder FROM stackable/image/java-devel AS spark-source-builder ARG PRODUCT +ARG RELEASE ARG STACKABLE_USER_UID WORKDIR /stackable @@ -21,9 +22,13 @@ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/stackable/patches/${PRODUCT} /sta RUN <>> Build spark # Compiling the tests takes a lot of time, so we skip them @@ -148,28 +161,37 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \ # # This will download it's own version of maven because the UBI version is too old: # 134.0 [ERROR] Detected Maven Version: 3.6.3 is not in the allowed range [3.8.8,) -RUN export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g" \ +RUN < Date: Fri, 13 Jun 2025 15:44:12 +0200 Subject: [PATCH 2/4] chore: changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 37ecd583b..1f06b24e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -82,6 +82,7 @@ All notable changes to this project will be documented in this file. - vector: Bump to `0.47.0` ([#1152]). - zookeeper: backport ZOOKEEPER-4846, ZOOKEEPER-4921, ZOOKEEPER-4925 into Zookeeper 3.9.3 ([#1150]). - testing-tools: Update base image ([#1165]). +- use custom product versions for Hadoop, HBase, Phoenix, hbase-operator-tools, Druid, Hive and Spark ([#1173]). ### Fixed @@ -190,6 +191,7 @@ All notable changes to this project will be documented in this file. [#1163]: https://github.com/stackabletech/docker-images/pull/1163 [#1165]: https://github.com/stackabletech/docker-images/pull/1165 [#1161]: https://github.com/stackabletech/docker-images/pull/1161 +[#1173]: https://github.com/stackabletech/docker-images/pull/1173 ## [25.3.0] - 2025-03-21 From 32bd7a1583c7f37c590968b108e86b00081f6bb4 Mon Sep 17 00:00:00 2001 From: dervoeti Date: Tue, 17 Jun 2025 11:34:48 +0200 Subject: [PATCH 3/4] chore: remove unnecessary variables --- hive/Dockerfile | 1 - spark-k8s/Dockerfile | 1 - 2 files changed, 2 deletions(-) diff --git a/hive/Dockerfile b/hive/Dockerfile index 9dc52d8b3..dc5bbc0c1 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -45,7 +45,6 @@ cd "$BUILD_SRC_DIR" # Make Maven aware of custom Stackable libraries cp -r /stackable/patched-libs/maven/* /stackable/.m2/repository -ORIGINAL_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) NEW_VERSION="${PRODUCT}-stackable${RELEASE}" mvn versions:set -DnewVersion=$NEW_VERSION -DartifactId=* -DgroupId=* diff --git a/spark-k8s/Dockerfile b/spark-k8s/Dockerfile index bf4cfe8df..c62a7c03b 100644 --- a/spark-k8s/Dockerfile +++ b/spark-k8s/Dockerfile @@ -61,7 +61,6 @@ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patche RUN < Date: Wed, 18 Jun 2025 10:07:02 +0200 Subject: [PATCH 4/4] fix: hbase snapshot export script --- hbase/Dockerfile | 3 ++- hbase/hbase/stackable/bin/export-snapshot-to-s3.env | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hbase/Dockerfile b/hbase/Dockerfile index 6f9b650c5..cf255a29d 100644 --- a/hbase/Dockerfile +++ b/hbase/Dockerfile @@ -18,6 +18,7 @@ FROM stackable/image/java-devel AS hadoop-s3-builder ARG PRODUCT ARG RELEASE ARG HADOOP +ARG HBASE_HBASE ARG STACKABLE_USER_UID USER ${STACKABLE_USER_UID} @@ -42,7 +43,7 @@ export LIBS=$(find /stackable/hadoop/share/hadoop -name '*.jar' -printf '%p:' | # The variable names are intentionally passed to envsubst in single-quotes, # so that they are not expanded. Disabling ShellCheck rules in a Dockerfile # does not work, so please ignore the according warning (SC2016). -envsubst '${HBASE_HBASE}:${LIBS}' < /stackable/bin/export-snapshot-to-s3.env > /stackable/bin/export-snapshot-to-s3 +envsubst '${HBASE_HBASE}:${RELEASE}:${LIBS}' < /stackable/bin/export-snapshot-to-s3.env > /stackable/bin/export-snapshot-to-s3 chmod +x /stackable/bin/export-snapshot-to-s3 rm /stackable/bin/export-snapshot-to-s3.env diff --git a/hbase/hbase/stackable/bin/export-snapshot-to-s3.env b/hbase/hbase/stackable/bin/export-snapshot-to-s3.env index 8eb11ff64..f0f65276a 100755 --- a/hbase/hbase/stackable/bin/export-snapshot-to-s3.env +++ b/hbase/hbase/stackable/bin/export-snapshot-to-s3.env @@ -61,7 +61,7 @@ sed --in-place '/<\/configuration>/{ # `hbase snapshot export` which results in the error # 'No FileSystem for scheme "hdfs"'. Passsing the argument # `--internal-classpath` solves this problem. -/stackable/hbase-${PRODUCT}/bin/hbase \ +/stackable/hbase-${HBASE_HBASE}-stackable${RELEASE}/bin/hbase \ --config "$CONF_DIR" \ --internal-classpath \ snapshot export "$@"