Skip to content

Commit

Permalink
Support libhdfs in velox
Browse files Browse the repository at this point in the history
  • Loading branch information
JkSelf committed Apr 12, 2024
1 parent d9454d6 commit 508df6b
Show file tree
Hide file tree
Showing 14 changed files with 1,033 additions and 8 deletions.
26 changes: 26 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ option(VELOX_ENABLE_S3 "Build S3 Connector" OFF)
option(VELOX_ENABLE_GCS "Build GCS Connector" OFF)
option(VELOX_ENABLE_ABFS "Build Abfs Connector" OFF)
option(VELOX_ENABLE_HDFS "Build Hdfs Connector" OFF)
option(VELOX_ENABLE_HDFS3 "Build Hdfs3 Connector" OFF)
option(VELOX_ENABLE_PARQUET "Enable Parquet support" OFF)
option(VELOX_ENABLE_ARROW "Enable Arrow support" OFF)
option(VELOX_ENABLE_REMOTE_FUNCTIONS "Enable remote function support" OFF)
Expand Down Expand Up @@ -237,6 +238,31 @@ if(VELOX_ENABLE_ABFS)
endif()

if(VELOX_ENABLE_HDFS)
if(DEFINED ENV{HADOOP_HOME} AND DEFINED ENV{JAVA_HOME})
message(STATUS "HADOOP_HOME is set to $ENV{HADOOP_HOME}")
message(STATUS "JAVA_HOME is set to $ENV{JAVA_HOME}")

set(HADOOP_HOME $ENV{HADOOP_HOME})
set(JAVA_HOME $ENV{JAVA_HOME})
else()
message(FATAL_ERROR "Both HADOOP_HOME and JAVA_HOME need to be set when enabling libhdfs. Please set the HADOOP_HOME and JAVA_HOME environment variables.")
# You can stop the configuration process if both variables are required
endif()

find_library(
LIBHDFS
NAMES libhdfs.so
HINTS "${HADOOP_HOME}/lib/native" REQUIRED)

find_library(
LIBJVM
NAMES libjvm.so
HINTS "${JAVA_HOME}/jre/lib/amd64/server/" REQUIRED)

add_definitions(-DVELOX_ENABLE_HDFS)
endif()

if(VELOX_ENABLE_HDFS3)
find_library(
LIBHDFS3
NAMES libhdfs3.so libhdfs3.dylib
Expand Down
4 changes: 2 additions & 2 deletions velox/connectors/hive/HiveConnector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#ifdef VELOX_ENABLE_GCS
#include "velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.h" // @manual
#endif
#ifdef VELOX_ENABLE_HDFS3
#if defined(VELOX_ENABLE_HDFS) || defined(VELOX_ENABLE_HDFS3)
#include "velox/connectors/hive/storage_adapters/hdfs/RegisterHdfsFileSystem.h" // @manual
#endif
#ifdef VELOX_ENABLE_S3
Expand Down Expand Up @@ -144,7 +144,7 @@ void HiveConnectorFactory::initialize() {
#ifdef VELOX_ENABLE_S3
filesystems::registerS3FileSystem();
#endif
#ifdef VELOX_ENABLE_HDFS3
#if defined(VELOX_ENABLE_HDFS) || defined(VELOX_ENABLE_HDFS3)
filesystems::registerHdfsFileSystem();
#endif
#ifdef VELOX_ENABLE_GCS
Expand Down
15 changes: 14 additions & 1 deletion velox/connectors/hive/storage_adapters/hdfs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,22 @@
add_library(velox_hdfs RegisterHdfsFileSystem.cpp)

if(VELOX_ENABLE_HDFS)
target_sources(velox_hdfs PRIVATE HdfsFileSystem.cpp HdfsReadFile.cpp
HdfsWriteFile.cpp HdfsInternal.cpp)
target_link_libraries(velox_hdfs Folly::folly ${LIBHDFS} ${LIBJVM} xsimd
arrow)

target_include_directories(velox_hdfs PRIVATE ${HADOOP_HOME}/include/)

if(${VELOX_BUILD_TESTING})
add_subdirectory(tests)
endif()
endif()

if(VELOX_ENABLE_HDFS3)
target_sources(velox_hdfs PRIVATE HdfsFileSystem.cpp HdfsReadFile.cpp
HdfsWriteFile.cpp)
target_link_libraries(velox_hdfs Folly::folly ${LIBHDFS3} xsimd)
target_link_libraries(velox_hdfs Folly::folly ${LIBHDFS3} xsimd arrow)

if(${VELOX_BUILD_TESTING})
add_subdirectory(tests)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@
* limitations under the License.
*/
#include "velox/connectors/hive/storage_adapters/hdfs/HdfsFileSystem.h"
#ifdef VELOX_ENABLE_HDFS3
#include <hdfs/hdfs.h>
#elif VELOX_ENABLE_HDFS
#include "velox/connectors/hive/storage_adapters/hdfs/HdfsInternal.h"
#endif
#include <mutex>
#include "velox/connectors/hive/storage_adapters/hdfs/HdfsReadFile.h"
#include "velox/connectors/hive/storage_adapters/hdfs/HdfsWriteFile.h"
Expand All @@ -32,11 +36,13 @@ class HdfsFileSystem::Impl {
hdfsBuilderSetNameNodePort(builder, atoi(endpoint.port.data()));
hdfsClient_ = hdfsBuilderConnect(builder);
hdfsFreeBuilder(builder);
#ifdef VELOX_ENABLE_HDFS3
VELOX_CHECK_NOT_NULL(
hdfsClient_,
"Unable to connect to HDFS: {}, got error: {}.",
endpoint.identity(),
hdfsGetLastError())
#endif
}

~Impl() {
Expand Down
Loading

0 comments on commit 508df6b

Please sign in to comment.