diff --git a/.github/workflows/checkstyle.yml b/.github/workflows/checkstyle.yml index 24ffe1695b53..2ce22c7c1f78 100644 --- a/.github/workflows/checkstyle.yml +++ b/.github/workflows/checkstyle.yml @@ -8,7 +8,7 @@ jobs: steps: - name: checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 @@ -16,7 +16,7 @@ jobs: node-version: '10.11.0' - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} @@ -24,7 +24,7 @@ jobs: ${{ runner.os }}-maven- - name: Cache local Go modules - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/go/pkg/mod key: ${{ runner.os }}-gomod-${{ hashFiles('**/go.mod') }} @@ -41,7 +41,7 @@ jobs: - name: Archive artifacts continue-on-error: true - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: always() with: name: artifact diff --git a/.github/workflows/fuse_integration_tests.yml b/.github/workflows/fuse_integration_tests.yml index ea9422bf8ded..25631570da8e 100644 --- a/.github/workflows/fuse_integration_tests.yml +++ b/.github/workflows/fuse_integration_tests.yml @@ -18,7 +18,7 @@ jobs: steps: - name: checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 @@ -26,13 +26,13 @@ jobs: node-version: '10.11.0' - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-java${{ matrix.java }}-${{ hashFiles('**/pom.xml') }} - name: Cache local Go modules - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/go/pkg/mod key: ${{ runner.os }}-gomod-${{ hashFiles('**/go.mod') }} @@ -50,7 +50,7 @@ jobs: - name: Archive artifacts continue-on-error: true - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: always() with: name: artifact diff --git a/.github/workflows/java8_integration_tests.yml b/.github/workflows/java8_integration_tests.yml index c10b5f648adc..454428cad849 100644 --- a/.github/workflows/java8_integration_tests.yml +++ b/.github/workflows/java8_integration_tests.yml @@ -31,7 +31,7 @@ jobs: steps: - name: checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 @@ -39,13 +39,13 @@ jobs: node-version: '10.11.0' - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-java8-${{ hashFiles('**/pom.xml') }} - name: Cache local Go modules - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/go/pkg/mod key: ${{ runner.os }}-gomod-${{ hashFiles('**/go.mod') }} @@ -63,7 +63,7 @@ jobs: - name: Archive artifacts continue-on-error: true - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: always() with: name: artifact diff --git a/.github/workflows/java8_integration_tests_ft.yml b/.github/workflows/java8_integration_tests_ft.yml index 1a22af092bbb..894854235706 100644 --- a/.github/workflows/java8_integration_tests_ft.yml +++ b/.github/workflows/java8_integration_tests_ft.yml @@ -21,7 +21,7 @@ jobs: steps: - name: checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 @@ -29,13 +29,13 @@ jobs: node-version: '10.11.0' - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-java8-${{ hashFiles('**/pom.xml') }} - name: Cache local Go modules - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/go/pkg/mod key: ${{ runner.os }}-gomod-${{ hashFiles('**/go.mod') }} @@ -55,7 +55,7 @@ jobs: - name: Archive artifacts continue-on-error: true - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: always() with: name: artifact diff --git a/.github/workflows/java8_integration_tests_webui.yml b/.github/workflows/java8_integration_tests_webui.yml index 85c4ba987062..cc82ee0b2a93 100644 --- a/.github/workflows/java8_integration_tests_webui.yml +++ b/.github/workflows/java8_integration_tests_webui.yml @@ -19,7 +19,7 @@ jobs: steps: - name: checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 @@ -27,13 +27,13 @@ jobs: node-version: '10.11.0' - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-java8-${{ hashFiles('**/pom.xml') }} - name: Cache local Go modules - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/go/pkg/mod key: ${{ runner.os }}-gomod-${{ hashFiles('**/go.mod') }} @@ -51,7 +51,7 @@ jobs: - name: Archive artifacts continue-on-error: true - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: always() with: name: artifact diff --git a/.github/workflows/java8_unit_tests.yml b/.github/workflows/java8_unit_tests.yml index 833688c22806..062ae2df3524 100644 --- a/.github/workflows/java8_unit_tests.yml +++ b/.github/workflows/java8_unit_tests.yml @@ -21,7 +21,7 @@ jobs: steps: - name: checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 @@ -29,13 +29,13 @@ jobs: node-version: '10.11.0' - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-java8-${{ hashFiles('**/pom.xml') }} - name: Cache local Go modules - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/go/pkg/mod key: ${{ runner.os }}-gomod-${{ hashFiles('**/go.mod') }} @@ -53,7 +53,7 @@ jobs: - name: Archive artifacts continue-on-error: true - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: always() with: name: artifact diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml new file mode 100644 index 000000000000..72ba9f046749 --- /dev/null +++ b/.github/workflows/stale.yaml @@ -0,0 +1,36 @@ +name: "Mark stale issues and PRs" +on: + schedule: + # Run the stalebot every day at 3pm UTC + - cron: "00 15 * * *" + +permissions: + contents: read + +jobs: + stale: + permissions: + issues: write # for writing stale message + pull-requests: write # for writing stale message + runs-on: ubuntu-22.04 + if: github.repository == 'alluxio/alluxio' + steps: + - uses: actions/stale@v6 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + ascending: true # old issues/PRs first + operations-per-run: 1000 # default is 30, enlarge for dealing with more issues/PRs + days-before-stale: 30 + days-before-close: -1 + stale-issue-message: > + This issue has been automatically marked as stale because it has not had recent activity. + It will be closed in two weeks if no further activity occurs. + Thank you for your contributions. + stale-pr-message: > + This pull request has been automatically marked as stale because it has not had + recent activity. It will be closed in two weeks if no further activity occurs. + Thank you for your contributions. + stale-pr-label: "stale" + stale-issue-label: "stale" + exempt-issue-labels: "keepalive,priority-high" + exempt-pr-labels: "keepalive,priority-high" diff --git a/README.md b/README.md index aeb94026ac50..7d9db7169b92 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![Release](https://img.shields.io/github/release/alluxio/alluxio/all.svg)](https://www.alluxio.io/download) [![Docker Pulls](https://img.shields.io/docker/pulls/alluxio/alluxio.svg)](https://hub.docker.com/r/alluxio/alluxio) [![Documentation](https://img.shields.io/badge/docs-reference-blue.svg)](https://www.alluxio.io/docs) +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/Alluxio/alluxio/badge)](https://api.securityscorecards.dev/projects/github.com/Alluxio/alluxio) [![Twitter Follow](https://img.shields.io/twitter/follow/alluxio.svg?label=Follow&style=social)](https://twitter.com/intent/follow?screen_name=alluxio) [![License](https://img.shields.io/github/license/alluxio/alluxio.svg)](https://github.com/Alluxio/alluxio/blob/master/LICENSE) diff --git a/bin/alluxio b/bin/alluxio index 0e2488219b85..280d4bbeb0df 100755 --- a/bin/alluxio +++ b/bin/alluxio @@ -21,6 +21,7 @@ function printUsage { echo -e " bootstrapConf \t Generate a config file if one doesn't exist" echo -e " fs \t Command line tool for interacting with the Alluxio filesystem." echo -e " fsadmin \t Command line tool for use by Alluxio filesystem admins." + echo -e " table \t (deprecated) Command line tool for interacting with the Alluxio table service." echo -e " getConf [key] \t Look up a configuration key, or print all configuration." echo -e " job \t Command line tool for interacting with the job service." echo -e " logLevel \t Set or get log level of Alluxio servers." @@ -294,6 +295,11 @@ function main { CLASSPATH=${ALLUXIO_CLIENT_CLASSPATH} runJavaClass "$@" ;; + "table") + CLASS="alluxio.cli.table.TableShell" + CLASSPATH=${ALLUXIO_CLIENT_CLASSPATH} + runJavaClass "$@" + ;; "logLevel") CLASS="alluxio.cli.LogLevel" CLASSPATH=${ALLUXIO_CLIENT_CLASSPATH} diff --git a/bin/alluxio-common.sh b/bin/alluxio-common.sh index 69023a43482f..d0b8f7926e33 100755 --- a/bin/alluxio-common.sh +++ b/bin/alluxio-common.sh @@ -68,3 +68,13 @@ function get_ramdisk_array() { done IFS=$oldifs } + +# Compose the ssh command according to the hostname +function ssh_command() { + local host=$1 + local command="" + if [[ $host != "localhost" && $host != "127.0.0.1" ]]; then + command="ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no -tt ${host}" + fi + echo "${command}" +} diff --git a/bin/alluxio-masters.sh b/bin/alluxio-masters.sh index 359c3daea348..73058d060733 100755 --- a/bin/alluxio-masters.sh +++ b/bin/alluxio-masters.sh @@ -12,12 +12,7 @@ set -o pipefail -LAUNCHER= -# If debugging is enabled propagate that through to sub-shells -if [[ "$-" == *x* ]]; then - LAUNCHER="bash -x" -fi -BIN=$(cd "$( dirname "$( readlink "$0" || echo "$0" )" )"; pwd) +. $(dirname "$0")/alluxio-common.sh USAGE="Usage: alluxio-masters.sh command..." @@ -37,8 +32,21 @@ ALLUXIO_TASK_LOG="${ALLUXIO_LOGS_DIR}/task.log" echo "Executing the following command on all master nodes and logging to ${ALLUXIO_TASK_LOG}: $@" | tee -a ${ALLUXIO_TASK_LOG} +check_true() { + local output=$1 + if [[ $output == *"true"* ]]; then + result="true" + else + result="false" + fi + echo $result +} + N=0 -HA_ENABLED=$(${BIN}/alluxio getConf ${ALLUXIO_MASTER_JAVA_OPTS} alluxio.zookeeper.enabled) + +HA_ENABLED_GETCONF_RES=$(${BIN}/alluxio getConf ${ALLUXIO_MASTER_JAVA_OPTS} alluxio.zookeeper.enabled) +HA_ENABLED=$(check_true "$HA_ENABLED_GETCONF_RES") + JOURNAL_TYPE=$(${BIN}/alluxio getConf ${ALLUXIO_MASTER_JAVA_OPTS} alluxio.master.journal.type | awk '{print toupper($0)}') if [[ ${JOURNAL_TYPE} == "EMBEDDED" ]]; then HA_ENABLED="true" @@ -46,10 +54,10 @@ fi for master in ${HOSTLIST[@]}; do echo "[${master}] Connecting as ${USER}..." >> ${ALLUXIO_TASK_LOG} if [[ ${HA_ENABLED} == "true" || ${N} -eq 0 ]]; then - nohup ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no -tt ${master} ${LAUNCHER} \ + nohup $(ssh_command ${master}) ${LAUNCHER} \ $"${@// /\\ }" 2>&1 | while read line; do echo "[$(date '+%F %T')][${master}] ${line}"; done >> ${ALLUXIO_TASK_LOG} & else - nohup ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no -tt ${master} ${LAUNCHER} \ + nohup $(ssh_command ${master}) ${LAUNCHER} \ $"export ALLUXIO_MASTER_SECONDARY=true; ${@// /\\ }" 2>&1 | while read line; do echo "[$(date '+%F %T')][${master}] ${line}"; done >> ${ALLUXIO_TASK_LOG} & fi pids[${#pids[@]}]=$! diff --git a/bin/alluxio-monitor.sh b/bin/alluxio-monitor.sh index ed57a5578e18..5460a9d6e1d9 100755 --- a/bin/alluxio-monitor.sh +++ b/bin/alluxio-monitor.sh @@ -125,7 +125,7 @@ run_monitor() { else "${JAVA}" -cp ${CLASSPATH} ${alluxio_config} ${monitor_exec} if [[ $? -ne 0 ]]; then - echo -e "${WHITE}---${NC} ${RED}[ FAILED ]${NC} The ${CYAN}${node_type}${NC} @ ${PURPLE}$(hostname -f)${NC} is not serving requests.${NC}" + echo -e "${WHITE}---${NC} ${RED}[ FAILED ]${NC} The ${CYAN}${node_type}${NC} @ ${PURPLE}$(hostname -f)${NC} is not serving requests after 120s. Please check if the process is running and the logs/ if necessary.${NC}" print_node_logs "${node_type}" return 1 fi @@ -192,7 +192,8 @@ run_monitors() { # if there is an error, print the log tail for the remaining master nodes. batch_run_on_nodes "$(echo ${nodes})" "${BIN}/alluxio-monitor.sh" -L "${node_type}" else - HA_ENABLED=$(${BIN}/alluxio getConf ${ALLUXIO_MASTER_JAVA_OPTS} alluxio.zookeeper.enabled) + HA_ENABLED_GETCONF_RES=$(${BIN}/alluxio getConf ${ALLUXIO_MASTER_JAVA_OPTS} alluxio.zookeeper.enabled) + HA_ENABLED=$(check_true "$HA_ENABLED_GETCONF_RES") JOURNAL_TYPE=$(${BIN}/alluxio getConf ${ALLUXIO_MASTER_JAVA_OPTS} alluxio.master.journal.type | awk '{print toupper($0)}') if [[ ${JOURNAL_TYPE} == "EMBEDDED" ]]; then HA_ENABLED="true" @@ -206,6 +207,16 @@ run_monitors() { fi } +check_true() { + local output=$1 + if [[ $output == *"true"* ]]; then + result="true" + else + result="false" + fi + echo $result +} + # Used to run a command on multiple hosts concurrently. # By default it limits concurrent tasks to 100. batch_run_on_nodes() { diff --git a/bin/alluxio-workers.sh b/bin/alluxio-workers.sh index 11dc9c9558ba..79b792706db6 100755 --- a/bin/alluxio-workers.sh +++ b/bin/alluxio-workers.sh @@ -12,12 +12,7 @@ set -o pipefail -LAUNCHER= -# If debugging is enabled propagate that through to sub-shells -if [[ "$-" == *x* ]]; then - LAUNCHER="bash -x" -fi -BIN=$(cd "$( dirname "$( readlink "$0" || echo "$0" )" )"; pwd) +. $(dirname "$0")/alluxio-common.sh USAGE="Usage: alluxio-workers.sh command..." @@ -39,7 +34,7 @@ echo "Executing the following command on all worker nodes and logging to ${ALLUX for worker in ${HOSTLIST[@]}; do echo "[${worker}] Connecting as ${USER}..." >> ${ALLUXIO_TASK_LOG} - nohup ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no -tt ${worker} ${LAUNCHER} \ + nohup $(ssh_command ${worker}) ${LAUNCHER} \ $"${@// /\\ }" 2>&1 | while read line; do echo "[$(date '+%F %T')][${worker}] ${line}"; done >> ${ALLUXIO_TASK_LOG} & pids[${#pids[@]}]=$! done diff --git a/bin/launch-process b/bin/launch-process index cc9c8f75dd24..604915557c27 100755 --- a/bin/launch-process +++ b/bin/launch-process @@ -49,8 +49,9 @@ USAGE+=" contains() { if [[ "$1" = *"$2"* ]]; then printf "1" + else + printf "0" fi - printf "0" } # Sets environment variables by sourcing ${ALLUXIO_HOME}/libexec/alluxio-config.sh @@ -124,8 +125,9 @@ launch_master() { fi # use a default Xmx value for the master - local res="$(contains "${ALLUXIO_MASTER_JAVA_OPTS}" "Xmx")" - if [[ "${res}" -eq "0" ]]; then + local contain_xmx="$(contains "${ALLUXIO_MASTER_JAVA_OPTS}" "Xmx")" + local contain_max_percentage="$(contains "${ALLUXIO_MASTER_JAVA_OPTS}" "MaxRAMPercentage")" + if [[ "${contain_xmx}" -eq "0" ]] && [[ "${contain_max_percentage}" -eq "0" ]]; then ALLUXIO_MASTER_JAVA_OPTS+=" -Xmx8g " fi # use a default MetaspaceSize value for the master @@ -142,8 +144,9 @@ launch_master() { # Launch a secondary master process launch_secondary_master() { # use a default Xmx value for the master - local res="$(contains "${ALLUXIO_SECONDARY_MASTER_JAVA_OPTS}" "Xmx")" - if [[ "${res}" -eq "0" ]]; then + local contain_xmx="$(contains "${ALLUXIO_SECONDARY_MASTER_JAVA_OPTS}" "Xmx")" + local contain_max_percentage="$(contains "${ALLUXIO_SECONDARY_MASTER_JAVA_OPTS}" "MaxRAMPercentage")" + if [[ "${contain_xmx}" -eq "0" ]] && [[ "${contain_max_percentage}" -eq "0" ]]; then ALLUXIO_SECONDARY_MASTER_JAVA_OPTS+=" -Xmx8g " fi launch_process "${ALLUXIO_SECONDARY_MASTER_ATTACH_OPTS}" \ @@ -161,8 +164,9 @@ launch_job_master() { # Launch a worker process launch_worker() { # use a default Xmx value for the worker - local res="$(contains "${ALLUXIO_WORKER_JAVA_OPTS}" "Xmx")" - if [[ "${res}" -eq "0" ]]; then + local contain_xmx="$(contains "${ALLUXIO_WORKER_JAVA_OPTS}" "Xmx")" + local contain_max_percentage="$(contains "${ALLUXIO_WORKER_JAVA_OPTS}" "MaxRAMPercentage")" + if [[ "${contain_xmx}" -eq "0" ]] && [[ "${contain_max_percentage}" -eq "0" ]]; then ALLUXIO_WORKER_JAVA_OPTS+=" -Xmx4g " fi diff --git a/conf/log4j.properties b/conf/log4j.properties index ff3714669160..5140e0e62f7a 100644 --- a/conf/log4j.properties +++ b/conf/log4j.properties @@ -31,7 +31,7 @@ log4j.appender.=org.apache.log4j.varia.NullAppender log4j.appender.Console=org.apache.log4j.ConsoleAppender log4j.appender.Console.Target=System.out log4j.appender.Console.layout=org.apache.log4j.PatternLayout -log4j.appender.Console.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.Console.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # The ParquetWriter logs for every row group which is not noisy for large row group size, # but very noisy for small row group size. @@ -44,7 +44,7 @@ log4j.appender.JOB_MASTER_LOGGER.File=${alluxio.logs.dir}/job_master.log log4j.appender.JOB_MASTER_LOGGER.MaxFileSize=10MB log4j.appender.JOB_MASTER_LOGGER.MaxBackupIndex=100 log4j.appender.JOB_MASTER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.JOB_MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.JOB_MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Job Workers log4j.appender.JOB_WORKER_LOGGER=org.apache.log4j.RollingFileAppender @@ -52,7 +52,7 @@ log4j.appender.JOB_WORKER_LOGGER.File=${alluxio.logs.dir}/job_worker.log log4j.appender.JOB_WORKER_LOGGER.MaxFileSize=10MB log4j.appender.JOB_WORKER_LOGGER.MaxBackupIndex=100 log4j.appender.JOB_WORKER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.JOB_WORKER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.JOB_WORKER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Master log4j.appender.MASTER_LOGGER=org.apache.log4j.RollingFileAppender @@ -60,7 +60,7 @@ log4j.appender.MASTER_LOGGER.File=${alluxio.logs.dir}/master.log log4j.appender.MASTER_LOGGER.MaxFileSize=10MB log4j.appender.MASTER_LOGGER.MaxBackupIndex=100 log4j.appender.MASTER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Master log4j.appender.SECONDARY_MASTER_LOGGER=org.apache.log4j.RollingFileAppender @@ -68,7 +68,7 @@ log4j.appender.SECONDARY_MASTER_LOGGER.File=${alluxio.logs.dir}/secondary_master log4j.appender.SECONDARY_MASTER_LOGGER.MaxFileSize=10MB log4j.appender.SECONDARY_MASTER_LOGGER.MaxBackupIndex=100 log4j.appender.SECONDARY_MASTER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.SECONDARY_MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.SECONDARY_MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Master audit log4j.appender.MASTER_AUDIT_LOGGER=org.apache.log4j.RollingFileAppender @@ -76,7 +76,7 @@ log4j.appender.MASTER_AUDIT_LOGGER.File=${alluxio.logs.dir}/master_audit.log log4j.appender.MASTER_AUDIT_LOGGER.MaxFileSize=10MB log4j.appender.MASTER_AUDIT_LOGGER.MaxBackupIndex=100 log4j.appender.MASTER_AUDIT_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.MASTER_AUDIT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.MASTER_AUDIT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Job Master audit log4j.appender.JOB_MASTER_AUDIT_LOGGER=org.apache.log4j.RollingFileAppender @@ -84,7 +84,7 @@ log4j.appender.JOB_MASTER_AUDIT_LOGGER.File=${alluxio.logs.dir}/job_master_audit log4j.appender.JOB_MASTER_AUDIT_LOGGER.MaxFileSize=10MB log4j.appender.JOB_MASTER_AUDIT_LOGGER.MaxBackupIndex=100 log4j.appender.JOB_MASTER_AUDIT_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.JOB_MASTER_AUDIT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.JOB_MASTER_AUDIT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Proxy log4j.appender.PROXY_LOGGER=org.apache.log4j.RollingFileAppender @@ -92,7 +92,7 @@ log4j.appender.PROXY_LOGGER.File=${alluxio.logs.dir}/proxy.log log4j.appender.PROXY_LOGGER.MaxFileSize=10MB log4j.appender.PROXY_LOGGER.MaxBackupIndex=100 log4j.appender.PROXY_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.PROXY_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.PROXY_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Proxy audit log4j.appender.PROXY_AUDIT_LOGGER=org.apache.log4j.RollingFileAppender @@ -100,7 +100,7 @@ log4j.appender.PROXY_AUDIT_LOGGER.File=${alluxio.logs.dir}/proxy_audit.log log4j.appender.PROXY_AUDIT_LOGGER.MaxFileSize=10MB log4j.appender.PROXY_AUDIT_LOGGER.MaxBackupIndex=100 log4j.appender.PROXY_AUDIT_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.PROXY_AUDIT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M) - %m%n +log4j.appender.PROXY_AUDIT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{2}[%t](%F:%M:%L) - %m%n # Appender for Workers log4j.appender.WORKER_LOGGER=org.apache.log4j.RollingFileAppender @@ -108,7 +108,7 @@ log4j.appender.WORKER_LOGGER.File=${alluxio.logs.dir}/worker.log log4j.appender.WORKER_LOGGER.MaxFileSize=10MB log4j.appender.WORKER_LOGGER.MaxBackupIndex=100 log4j.appender.WORKER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.WORKER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.WORKER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Remote appender for Job Master log4j.appender.REMOTE_JOB_MASTER_LOGGER=org.apache.log4j.net.SocketAppender @@ -170,7 +170,7 @@ log4j.appender.LOGSERVER_LOGGER.File=${alluxio.logs.dir}/logserver.log log4j.appender.LOGSERVER_LOGGER.MaxFileSize=10MB log4j.appender.LOGSERVER_LOGGER.MaxBackupIndex=100 log4j.appender.LOGSERVER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.LOGSERVER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.LOGSERVER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # (Local) appender for log server to log on behalf of log clients # No need to configure file path because log server will dynamically @@ -179,7 +179,7 @@ log4j.appender.LOGSERVER_CLIENT_LOGGER=org.apache.log4j.RollingFileAppender log4j.appender.LOGSERVER_CLIENT_LOGGER.MaxFileSize=10MB log4j.appender.LOGSERVER_CLIENT_LOGGER.MaxBackupIndex=100 log4j.appender.LOGSERVER_CLIENT_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.LOGSERVER_CLIENT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.LOGSERVER_CLIENT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for User log4j.appender.USER_LOGGER=org.apache.log4j.RollingFileAppender @@ -187,7 +187,7 @@ log4j.appender.USER_LOGGER.File=${alluxio.user.logs.dir}/user_${user.name}.log log4j.appender.USER_LOGGER.MaxFileSize=10MB log4j.appender.USER_LOGGER.MaxBackupIndex=10 log4j.appender.USER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.USER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.USER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Fuse log4j.appender.FUSE_LOGGER=org.apache.log4j.RollingFileAppender @@ -195,7 +195,7 @@ log4j.appender.FUSE_LOGGER.File=${alluxio.logs.dir}/fuse.log log4j.appender.FUSE_LOGGER.MaxFileSize=100MB log4j.appender.FUSE_LOGGER.MaxBackupIndex=10 log4j.appender.FUSE_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.FUSE_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.FUSE_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Disable noisy DEBUG logs log4j.logger.com.amazonaws.util.EC2MetadataUtils=OFF diff --git a/core/client/fs/src/main/java/alluxio/client/CanUnbuffer.java b/core/client/fs/src/main/java/alluxio/client/CanUnbuffer.java new file mode 100644 index 000000000000..c9da5ecda937 --- /dev/null +++ b/core/client/fs/src/main/java/alluxio/client/CanUnbuffer.java @@ -0,0 +1,23 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client; + +/** + * indicate that InputStream can clear their buffers on request. + */ +public interface CanUnbuffer { + /** + * Reduce the buffering. This will also free sockets and file descriptors held by the stream, + * if possible. + */ + void unbuffer(); +} diff --git a/core/client/fs/src/main/java/alluxio/client/block/AllMastersWorkerInfo.java b/core/client/fs/src/main/java/alluxio/client/block/AllMastersWorkerInfo.java new file mode 100644 index 000000000000..d858f29276eb --- /dev/null +++ b/core/client/fs/src/main/java/alluxio/client/block/AllMastersWorkerInfo.java @@ -0,0 +1,88 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.block; + +import alluxio.collections.Pair; +import alluxio.wire.WorkerInfo; + +import java.net.InetSocketAddress; +import java.util.List; +import java.util.Map; + +/** + * A data class to persist aggregated worker info from all masters, including standby masters. + * Used when worker all master registration feature is enabled. + */ +public class AllMastersWorkerInfo { + private final Map mWorkerIdAddressMap; + private final List mMasterAddresses; + private final InetSocketAddress mPrimaryMasterAddress; + private final List mPrimaryMasterWorkerInfo; + private final Map>> mWorkerIdInfoMap; + + /** + * @param workerIdAddressMap the worker id to address map + * @param masterAddresses the master addresses + * @param primaryMasterAddress the primary master address + * @param primaryMasterWorkerInfo the worker info of the primary master + * @param workerIdInfoMap the worker id to worker info map + */ + public AllMastersWorkerInfo( + Map workerIdAddressMap, + List masterAddresses, + InetSocketAddress primaryMasterAddress, + List primaryMasterWorkerInfo, + Map>> workerIdInfoMap) { + mWorkerIdAddressMap = workerIdAddressMap; + mMasterAddresses = masterAddresses; + mPrimaryMasterAddress = primaryMasterAddress; + mPrimaryMasterWorkerInfo = primaryMasterWorkerInfo; + mWorkerIdInfoMap = workerIdInfoMap; + } + + /** + * @return the worker id to worker address map + */ + public Map getWorkerIdAddressMap() { + return mWorkerIdAddressMap; + } + + /** + * @return the master addresses for all masters + */ + public List getMasterAddresses() { + return mMasterAddresses; + } + + /** + * @return the primary master address + */ + public InetSocketAddress getPrimaryMasterAddress() { + return mPrimaryMasterAddress; + } + + /** + * @return the worker info for all workers from the primary master + */ + public List getPrimaryMasterWorkerInfo() { + return mPrimaryMasterWorkerInfo; + } + + /** + * @return a map which keys are the worker ids and values are lists of pairs, + * the first element in the pair is the master address and the second element is + * the worker info for such worker id gotten from the master with such master address. + */ + public Map>> getWorkerIdInfoMap() { + return mWorkerIdInfoMap; + } +} diff --git a/core/client/fs/src/main/java/alluxio/client/block/BlockMasterClient.java b/core/client/fs/src/main/java/alluxio/client/block/BlockMasterClient.java index bcc5864d71bc..12649a817187 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/BlockMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/block/BlockMasterClient.java @@ -13,6 +13,9 @@ import alluxio.Client; import alluxio.client.block.options.GetWorkerReportOptions; +import alluxio.exception.status.AlluxioStatusException; +import alluxio.grpc.DecommissionWorkerPOptions; +import alluxio.grpc.RemoveDisabledWorkerPOptions; import alluxio.grpc.WorkerLostStorageInfo; import alluxio.master.MasterClientContext; import alluxio.wire.BlockInfo; @@ -57,11 +60,11 @@ public static BlockMasterClient create(MasterClientContext conf) { List getWorkerInfoList() throws IOException; /** - * Remove the metadata of a decommissioned worker. + * Revert disabling a worker, enabling it to register to the cluster. * - * @param workerName contains a string, representing the workerName + * @param options contains the info used to find the target worker(s) */ - void removeDecommissionedWorker(String workerName) throws IOException; + void removeDisabledWorker(RemoveDisabledWorkerPOptions options) throws IOException; /** * Gets the worker information of selected workers and selected fields for report CLI. @@ -104,4 +107,11 @@ List getWorkerReport(final GetWorkerReportOptions options) * @return amount of used space in bytes */ long getUsedBytes() throws IOException; + + /** + * Decommission a worker. + * @param options method options + * @throws AlluxioStatusException if something goes wrong + */ + void decommissionWorker(DecommissionWorkerPOptions options) throws IOException; } diff --git a/core/client/fs/src/main/java/alluxio/client/block/BlockStoreClient.java b/core/client/fs/src/main/java/alluxio/client/block/BlockStoreClient.java index 337cc64e1ccf..65221df05e2a 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/BlockStoreClient.java +++ b/core/client/fs/src/main/java/alluxio/client/block/BlockStoreClient.java @@ -125,7 +125,7 @@ public BlockInStream getInStream(long blockId, InStreamOptions options) throws I * * @param blockId the id of the block to read * @param options the options associated with the read request - * @param failedWorkers the map of workers addresses to most recent failure time + * @param failedWorkers the map of worker's addresses to most recent failure time * @return a stream which reads from the beginning of the block */ public BlockInStream getInStream(long blockId, InStreamOptions options, @@ -140,7 +140,7 @@ public BlockInStream getInStream(long blockId, InStreamOptions options, * * @param info the block info * @param options the options associated with the read request - * @param failedWorkers the map of workers addresses to most recent failure time + * @param failedWorkers the map of worker's addresses to most recent failure time * @return a stream which reads from the beginning of the block */ public BlockInStream getInStream(BlockInfo info, InStreamOptions options, @@ -169,7 +169,7 @@ public BlockInStream getInStream(BlockInfo info, InStreamOptions options, * @param info the info of the block to read * @param status the URIStatus associated with the read request * @param policy the policy determining the Alluxio worker location - * @param failedWorkers the map of workers addresses to most recent failure time + * @param failedWorkers the map of worker's addresses to most recent failure time * @return the data source and type of data source of the block */ public Pair getDataSourceAndType(BlockInfo info, diff --git a/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java b/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java index 2824c36dcb63..7c1652156d91 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java @@ -15,6 +15,7 @@ import alluxio.Constants; import alluxio.client.block.options.GetWorkerReportOptions; import alluxio.grpc.BlockMasterClientServiceGrpc; +import alluxio.grpc.DecommissionWorkerPOptions; import alluxio.grpc.GetBlockInfoPRequest; import alluxio.grpc.GetBlockMasterInfoPOptions; import alluxio.grpc.GetCapacityBytesPOptions; @@ -22,10 +23,12 @@ import alluxio.grpc.GetWorkerInfoListPOptions; import alluxio.grpc.GetWorkerLostStoragePOptions; import alluxio.grpc.GrpcUtils; -import alluxio.grpc.RemoveDecommissionedWorkerPOptions; +import alluxio.grpc.RemoveDisabledWorkerPOptions; import alluxio.grpc.ServiceType; import alluxio.grpc.WorkerLostStorageInfo; import alluxio.master.MasterClientContext; +import alluxio.master.selectionpolicy.MasterSelectionPolicy; +import alluxio.retry.RetryPolicy; import alluxio.wire.BlockInfo; import alluxio.wire.BlockMasterInfo; import alluxio.wire.BlockMasterInfo.BlockMasterInfoField; @@ -35,9 +38,11 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.List; import java.util.Set; +import java.util.function.Supplier; import java.util.stream.Collectors; import javax.annotation.concurrent.ThreadSafe; @@ -59,6 +64,29 @@ public RetryHandlingBlockMasterClient(MasterClientContext conf) { super(conf); } + /** + * Creates a new block master client. + * + * @param conf master client configuration + * @param address the master address the client connects to + */ + public RetryHandlingBlockMasterClient(MasterClientContext conf, InetSocketAddress address) { + super(conf, MasterSelectionPolicy.Factory.specifiedMaster(address)); + } + + /** + * Creates a new block master client. + * + * @param conf master client configuration + * @param address the master address the client connects to + * @param retryPolicy retry policy to use + */ + public RetryHandlingBlockMasterClient( + MasterClientContext conf, InetSocketAddress address, + Supplier retryPolicy) { + super(conf, MasterSelectionPolicy.Factory.specifiedMaster(address), retryPolicy); + } + @Override protected ServiceType getRemoteServiceType() { return ServiceType.BLOCK_MASTER_CLIENT_SERVICE; @@ -93,10 +121,9 @@ public List getWorkerInfoList() throws IOException { } @Override - public void removeDecommissionedWorker(String workerName) throws IOException { - retryRPC(() -> mClient.removeDecommissionedWorker(RemoveDecommissionedWorkerPOptions - .newBuilder().setWorkerName(workerName).build()), - RPC_LOG, "RemoveDecommissionedWorker", ""); + public void removeDisabledWorker(RemoveDisabledWorkerPOptions options) throws IOException { + retryRPC(() -> mClient.removeDisabledWorker(options), + RPC_LOG, "RemoveDisabledWorker", ""); } @Override @@ -150,4 +177,11 @@ public long getUsedBytes() throws IOException { () -> mClient.getUsedBytes(GetUsedBytesPOptions.getDefaultInstance()).getBytes(), RPC_LOG, "GetUsedBytes", ""); } + + @Override + public void decommissionWorker(DecommissionWorkerPOptions options) throws IOException { + retryRPC(() -> mClient.decommissionWorker(options), + RPC_LOG, "DecommissionWorker", "workerHostName=%s,workerWebPort=%s,options=%s", + options.getWorkerHostname(), options.getWorkerWebPort(), options); + } } diff --git a/core/client/fs/src/main/java/alluxio/client/block/options/GetWorkerReportOptions.java b/core/client/fs/src/main/java/alluxio/client/block/options/GetWorkerReportOptions.java index 12098cd96d10..3e63692dff85 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/options/GetWorkerReportOptions.java +++ b/core/client/fs/src/main/java/alluxio/client/block/options/GetWorkerReportOptions.java @@ -196,7 +196,8 @@ public enum WorkerInfoField { WORKER_USED_BYTES, WORKER_USED_BYTES_ON_TIERS, BLOCK_COUNT, - BUILD_VERSION; + BUILD_VERSION, + NUM_VCPU; public static final Set ALL = EnumSet.allOf(WorkerInfoField.class); diff --git a/core/client/fs/src/main/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicy.java b/core/client/fs/src/main/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicy.java new file mode 100644 index 000000000000..936ed11770bb --- /dev/null +++ b/core/client/fs/src/main/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicy.java @@ -0,0 +1,136 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.block.policy; + +import alluxio.client.block.BlockWorkerInfo; +import alluxio.client.block.policy.options.GetWorkerOptions; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.wire.WorkerNetAddress; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.Streams; +import org.apache.commons.codec.digest.MurmurHash3; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.TreeMap; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicLong; + +/** + * A policy that pseudo-randomly distributes blocks between workers according to their capacity, + * so that the probability a worker is chosen is equal to the ratio of its capacity over total + * capacity of all workers, provided that the blocks requested follow a uniform distribution. + * If sharding is disabled, the same block is always assigned to the same worker. If sharding + * is enabled, the block is assigned to a fixed set of workers. + * + * The target worker is determined by the following algorithm: + * 1. build a cumulative distribution function by adding up all workers and their capacities. + * workers are sorted by their host name alphabetically. + * if worker A has 90 GB, B has 10 GB and C has 900 GB, the CDF looks like + * | 0 ... 90 | 90 ... 100 | 100 ... 1000 | + * | worker A | worker B | worker C | + * 2. find a fixed starting point in [0, totalCapacity) determined by the hashed block id. + * | 0 ... 90 | 90 ... 100 | 100 ... 1000 | + * | worker A | worker B | worker C | + * ^ start = 95 + * 3. find the corresponding worker in the CDF. + * which is worker B in this example + * 4. if #shards = 1, this worker is selected. otherwise, find a set of candidates: + * 4.1 hashed_block_id(0) = block id + * 4.2 for i in [1, #shards], hashed_block_id(i) = hash(hashed_block_id(i-1)) + * 4.3 find the worker whose position corresponds to hashed_block_id(i) in the CDF, + * and add it to the candidates set + * 4.4 repeat 4.2 - 4.4 + * 5. select a random worker in the candidate set + * + * The difference between this policy and {@link CapacityBaseRandomPolicy} is that this policy + * uses the hashed block ID as the index to choose the target worker, so that the same block is + * always routed to the same set of workers. + * + * Both this policy and {@link DeterministicHashPolicy} choose workers based the hashed block ID. + * The difference is that {@link DeterministicHashPolicy} uniformly distributes the blocks among + * the configured number of shards, while this policy chooses workers based on a distribution of + * their normalized capacity. + * + * @see CapacityBaseRandomPolicy + * @see DeterministicHashPolicy + */ +public class CapacityBasedDeterministicHashPolicy implements BlockLocationPolicy { + private final int mShards; + + /** + * Constructor required by + * {@link BlockLocationPolicy.Factory#create(Class, AlluxioConfiguration)}. + * @param conf Alluxio configuration + */ + public CapacityBasedDeterministicHashPolicy(AlluxioConfiguration conf) { + int numShards = + conf.getInt(PropertyKey.USER_UFS_BLOCK_READ_LOCATION_POLICY_DETERMINISTIC_HASH_SHARDS); + Preconditions.checkArgument(numShards >= 1, "number of shards must be no less than 1"); + mShards = numShards; + } + + @Override + public Optional getWorker(GetWorkerOptions options) { + TreeMap capacityCdf = new TreeMap<>(); + AtomicLong totalCapacity = new AtomicLong(0); + Streams.stream(options.getBlockWorkerInfos()) + .filter(workerInfo -> workerInfo.getCapacityBytes() >= options.getBlockInfo().getLength()) + // sort by hostname to guarantee two workers with the same capacity has a defined order + .sorted(Comparator.comparing(w -> w.getNetAddress().getHost())) + .forEach(workerInfo -> { + capacityCdf.put(totalCapacity.get(), workerInfo); + totalCapacity.getAndAdd(workerInfo.getCapacityBytes()); + }); + if (totalCapacity.get() == 0 || capacityCdf.isEmpty()) { + return Optional.empty(); + } + long blockId = options.getBlockInfo().getBlockId(); + BlockWorkerInfo chosenWorker = pickWorker(capacityCdf, blockId, totalCapacity.get()); + return Optional.of(chosenWorker.getNetAddress()); + } + + private BlockWorkerInfo pickWorker(TreeMap capacityCdf, + long blockId, long totalCapacity) { + if (mShards == 1) { + // if no sharding, simply return the worker corresponding to the start point + long startPoint = Math.abs(hashBlockId(blockId)) % totalCapacity; + return capacityCdf.floorEntry(startPoint).getValue(); + } + long hashedBlockId = blockId; + List candidates = new ArrayList<>(); + for (int i = 1; i <= Math.min(mShards, capacityCdf.size()); i++) { + hashedBlockId = hashBlockId(hashedBlockId); + BlockWorkerInfo candidate = capacityCdf + .floorEntry(Math.abs(hashedBlockId) % totalCapacity) // non-null as capacities >= 0 + .getValue(); + candidates.add(candidate); + } + return getRandomCandidate(candidates); + } + + @VisibleForTesting + protected long hashBlockId(long blockId) { + return MurmurHash3.hash64(blockId); + } + + @VisibleForTesting + protected BlockWorkerInfo getRandomCandidate(List candidates) { + int randomIndex = ThreadLocalRandom.current().nextInt(candidates.size()); + return candidates.get(randomIndex); + } +} diff --git a/core/client/fs/src/main/java/alluxio/client/block/policy/SpecificHostPolicy.java b/core/client/fs/src/main/java/alluxio/client/block/policy/SpecificHostPolicy.java index befb3248142b..30224aaf1c1e 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/policy/SpecificHostPolicy.java +++ b/core/client/fs/src/main/java/alluxio/client/block/policy/SpecificHostPolicy.java @@ -22,6 +22,7 @@ import com.google.common.base.Preconditions; import java.util.Optional; +import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; /** @@ -31,6 +32,8 @@ @ThreadSafe public final class SpecificHostPolicy implements BlockLocationPolicy { private final String mHostname; + @Nullable + private final Integer mRpcPort; /** * Constructs a new {@link SpecificHostPolicy} @@ -39,7 +42,7 @@ public final class SpecificHostPolicy implements BlockLocationPolicy { * @param conf Alluxio configuration */ public SpecificHostPolicy(AlluxioConfiguration conf) { - this(conf.getString(PropertyKey.WORKER_HOSTNAME)); + this(conf.getString(PropertyKey.WORKER_HOSTNAME), conf.getInt(PropertyKey.WORKER_RPC_PORT)); } /** @@ -48,7 +51,18 @@ public SpecificHostPolicy(AlluxioConfiguration conf) { * @param hostname the name of the host */ public SpecificHostPolicy(String hostname) { + this(hostname, null); + } + + /** + * Constructs the policy with the hostname and port. + * + * @param hostname the name of the host + * @param rpcPort the rpc port + */ + public SpecificHostPolicy(String hostname, @Nullable Integer rpcPort) { mHostname = Preconditions.checkNotNull(hostname, "hostname"); + mRpcPort = rpcPort; } /** @@ -59,7 +73,8 @@ public SpecificHostPolicy(String hostname) { public Optional getWorker(GetWorkerOptions options) { // find the first worker matching the host name for (BlockWorkerInfo info : options.getBlockWorkerInfos()) { - if (info.getNetAddress().getHost().equals(mHostname)) { + if (info.getNetAddress().getHost().equals(mHostname) + && (mRpcPort == null || info.getNetAddress().getRpcPort() == mRpcPort)) { return Optional.of(info.getNetAddress()); } } diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/BlockInStream.java b/core/client/fs/src/main/java/alluxio/client/block/stream/BlockInStream.java index 042662935f28..e6652444987b 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/BlockInStream.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/BlockInStream.java @@ -13,6 +13,7 @@ import alluxio.Seekable; import alluxio.client.BoundedStream; +import alluxio.client.CanUnbuffer; import alluxio.client.PositionedReadable; import alluxio.client.ReadType; import alluxio.client.file.FileSystemContext; @@ -25,6 +26,7 @@ import alluxio.grpc.ReadRequest; import alluxio.network.protocol.databuffer.DataBuffer; import alluxio.proto.dataserver.Protocol; +import alluxio.util.LogUtils; import alluxio.util.io.BufferUtils; import alluxio.util.network.NettyUtils; import alluxio.util.network.NetworkAddressUtils; @@ -48,7 +50,7 @@ */ @NotThreadSafe public class BlockInStream extends InputStream implements BoundedStream, Seekable, - PositionedReadable { + PositionedReadable, CanUnbuffer { private static final Logger LOG = LoggerFactory.getLogger(BlockInStream.class); /** the source tracking where the block is from. */ @@ -70,9 +72,9 @@ public enum BlockInStreamSource { /** Current position of the stream, relative to the start of the block. */ private long mPos = 0; /** The current data chunk. */ - private DataBuffer mCurrentChunk; + protected DataBuffer mCurrentChunk; - private DataReader mDataReader; + protected DataReader mDataReader; private final DataReader.Factory mDataReaderFactory; private boolean mClosed = false; @@ -510,6 +512,15 @@ private void closeDataReader() throws IOException { mDataReader = null; } + @Override + public void unbuffer() { + try { + closeDataReader(); + } catch (IOException e) { + LogUtils.warnWithException(LOG, "failed to unbuffer the block stream", e); + } + } + /** * Convenience method to ensure the stream is not closed. */ diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/BlockWorkerDataWriter.java b/core/client/fs/src/main/java/alluxio/client/block/stream/BlockWorkerDataWriter.java index cf3930efb883..ea1302e604bd 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/BlockWorkerDataWriter.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/BlockWorkerDataWriter.java @@ -27,6 +27,7 @@ import io.netty.buffer.ByteBuf; import java.io.IOException; +import java.util.Optional; import javax.annotation.concurrent.NotThreadSafe; /** @@ -86,6 +87,11 @@ public int chunkSize() { return mChunkSize; } + @Override + public Optional getUfsContentHash() { + return Optional.empty(); + } + @Override public void writeChunk(final ByteBuf buf) throws IOException { try { diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/DataWriter.java b/core/client/fs/src/main/java/alluxio/client/block/stream/DataWriter.java index 90541ebf6e2f..0f7f9665957a 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/DataWriter.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/DataWriter.java @@ -29,6 +29,7 @@ import java.io.Closeable; import java.io.IOException; +import java.util.Optional; import javax.annotation.concurrent.ThreadSafe; /** @@ -97,6 +98,12 @@ public static DataWriter create(FileSystemContext context, long blockId, long bl } } + /** + * @return the content hash of the file if it is written to the UFS. Will only + * return a non-empty value after the data writer has been closed. + */ + Optional getUfsContentHash(); + /** * Writes a chunk. This method takes the ownership of this chunk even if it fails to write * the chunk. diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcBlockingStream.java b/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcBlockingStream.java index 3a6c5a353419..b7babff520a3 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcBlockingStream.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcBlockingStream.java @@ -28,6 +28,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.Optional; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.TimeUnit; @@ -243,14 +244,20 @@ public void cancel() { * Wait for server to complete the inbound stream. * * @param timeoutMs maximum time to wait for server response + * @return the last response of the stream */ - public void waitForComplete(long timeoutMs) throws IOException { + public Optional waitForComplete(long timeoutMs) throws IOException { if (mCompleted || mCanceled) { - return; + return Optional.empty(); } - while (receive(timeoutMs) != null) { + ResT prevResponse; + ResT response = null; + do { // wait until inbound stream is closed from server. - } + prevResponse = response; + response = receive(timeoutMs); + } while (response != null); + return Optional.ofNullable(prevResponse); } /** diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataMessageBlockingStream.java b/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataMessageBlockingStream.java index a07cc3614fdd..b9ba0c37110f 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataMessageBlockingStream.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataMessageBlockingStream.java @@ -20,6 +20,7 @@ import io.grpc.stub.StreamObserver; import java.io.IOException; +import java.util.Optional; import java.util.function.Function; import javax.annotation.concurrent.NotThreadSafe; @@ -104,17 +105,21 @@ public void sendDataMessage(DataMessage message, long timeoutM } @Override - public void waitForComplete(long timeoutMs) throws IOException { + public Optional waitForComplete(long timeoutMs) throws IOException { if (mResponseMarshaller == null) { - super.waitForComplete(timeoutMs); - return; + return super.waitForComplete(timeoutMs); } + // loop until the last response is received, whose result will be returned DataMessage message; + DataMessage prevMessage = null; while (!isCanceled() && (message = receiveDataMessage(timeoutMs)) != null) { - if (message.getBuffer() != null) { - message.getBuffer().release(); + if (prevMessage != null && prevMessage.getBuffer() != null) { + prevMessage.getBuffer().release(); } + prevMessage = message; } - super.waitForComplete(timeoutMs); + // note that the combineData call is responsible for releasing the buffer of prevMessage + ResT result = mResponseMarshaller.combineData(prevMessage); + return Optional.ofNullable(super.waitForComplete(timeoutMs).orElse(result)); } } diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataWriter.java b/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataWriter.java index 9b57d946ea0b..bfaffc62a6c5 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataWriter.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataWriter.java @@ -36,6 +36,7 @@ import io.netty.buffer.ByteBuf; import java.io.IOException; +import java.util.Optional; import javax.annotation.concurrent.NotThreadSafe; /** @@ -68,6 +69,9 @@ public final class GrpcDataWriter implements DataWriter { private final long mChunkSize; private final GrpcBlockingStream mStream; + /** The content hash resulting from the write operation if one is available. */ + private String mContentHash = null; + /** * The next pos to queue to the buffer. */ @@ -177,6 +181,11 @@ public long pos() { return mPosToQueue; } + @Override + public Optional getUfsContentHash() { + return Optional.ofNullable(mContentHash); + } + @Override public void writeChunk(final ByteBuf buf) throws IOException { mPosToQueue += buf.readableBytes(); @@ -239,6 +248,9 @@ public void flush() throws IOException { writeRequest, mAddress)); } posWritten = response.getOffset(); + if (response.hasContentHash()) { + mContentHash = response.getContentHash(); + } } while (mPosToQueue != posWritten); } @@ -249,7 +261,9 @@ public void close() throws IOException { return; } mStream.close(); - mStream.waitForComplete(mWriterCloseTimeoutMs); + mStream.waitForComplete(mWriterCloseTimeoutMs) + .ifPresent(writeResponse -> mContentHash = writeResponse.hasContentHash() + ? writeResponse.getContentHash() : null); } finally { mClient.close(); } diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/LocalFileDataWriter.java b/core/client/fs/src/main/java/alluxio/client/block/stream/LocalFileDataWriter.java index b3cd3778350c..03b37e0b9760 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/LocalFileDataWriter.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/LocalFileDataWriter.java @@ -31,6 +31,7 @@ import io.netty.buffer.ByteBuf; import java.io.IOException; +import java.util.Optional; import javax.annotation.concurrent.NotThreadSafe; /** @@ -119,6 +120,11 @@ public int chunkSize() { return (int) mChunkSize; } + @Override + public Optional getUfsContentHash() { + return Optional.empty(); + } + @Override public void writeChunk(final ByteBuf buf) throws IOException { try { diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/UfsFallbackLocalFileDataWriter.java b/core/client/fs/src/main/java/alluxio/client/block/stream/UfsFallbackLocalFileDataWriter.java index b534ea28f326..d4140e85846e 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/UfsFallbackLocalFileDataWriter.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/UfsFallbackLocalFileDataWriter.java @@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.Optional; import javax.annotation.concurrent.NotThreadSafe; /** @@ -82,6 +83,11 @@ public static UfsFallbackLocalFileDataWriter create(FileSystemContext context, mIsWritingToLocal = mLocalFileDataWriter != null; } + @Override + public Optional getUfsContentHash() { + return mGrpcDataWriter.getUfsContentHash(); + } + @Override public void writeChunk(ByteBuf chunk) throws IOException { if (mIsWritingToLocal) { diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/UnderFileSystemFileOutStream.java b/core/client/fs/src/main/java/alluxio/client/block/stream/UnderFileSystemFileOutStream.java index fd0066ab3786..40df073e81d2 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/UnderFileSystemFileOutStream.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/UnderFileSystemFileOutStream.java @@ -30,6 +30,7 @@ @NotThreadSafe public class UnderFileSystemFileOutStream extends BlockOutStream { private static final int ID_UNUSED = -1; + private final DataWriter mDataWriter; /** * Creates an instance of {@link UnderFileSystemFileOutStream} that writes to a UFS file. @@ -52,6 +53,14 @@ public static UnderFileSystemFileOutStream create(FileSystemContext context, */ protected UnderFileSystemFileOutStream(DataWriter dataWriter, WorkerNetAddress address) { super(dataWriter, Long.MAX_VALUE, address); + mDataWriter = dataWriter; + } + + /** + * @return the data writer for the stream + */ + public DataWriter getDataWriter() { + return mDataWriter; } @Override diff --git a/core/client/fs/src/main/java/alluxio/client/block/util/WorkerInfoUtil.java b/core/client/fs/src/main/java/alluxio/client/block/util/WorkerInfoUtil.java new file mode 100644 index 000000000000..c8e489d2bc1c --- /dev/null +++ b/core/client/fs/src/main/java/alluxio/client/block/util/WorkerInfoUtil.java @@ -0,0 +1,146 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.block.util; + +import alluxio.ClientContext; +import alluxio.annotation.SuppressFBWarnings; +import alluxio.client.block.AllMastersWorkerInfo; +import alluxio.client.block.BlockMasterClient; +import alluxio.client.block.RetryHandlingBlockMasterClient; +import alluxio.client.block.options.GetWorkerReportOptions; +import alluxio.collections.Pair; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.master.MasterClientContext; +import alluxio.retry.TimeoutRetry; +import alluxio.util.ConfigurationUtils; +import alluxio.wire.WorkerInfo; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * The util class for getting the worker info. + */ +public class WorkerInfoUtil { + private static final Logger LOG = LoggerFactory.getLogger(WorkerInfoUtil.class); + private static final int RETRY_TIMEOUT = 5000; + private static final int RETRY_INTERVAL = 500; + + /** + * Get worker reports from all masters, including standby masters. + * Can only be called when worker all master registration feature is enabled. + * + * @param configuration the cluster configuration + * @param primaryMasterClient the block master client connecting to the primary + * @param options the options to make the GetWorkerReport rpc + * @return the aggregated worker info + */ + public static AllMastersWorkerInfo getWorkerReportsFromAllMasters( + AlluxioConfiguration configuration, + BlockMasterClient primaryMasterClient, + GetWorkerReportOptions options) throws IOException { + Preconditions.checkState( + configuration.getBoolean(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS), + "GetWorkerReportsFromAllMasters is used to collect worker info from " + + "all masters, including standby masters. " + + "This method requires worker all master registration to be enabled."); + + Preconditions.checkState( + options.getFieldRange().contains(GetWorkerReportOptions.WorkerInfoField.ID)); + Preconditions.checkState( + options.getFieldRange().contains(GetWorkerReportOptions.WorkerInfoField.STATE)); + Preconditions.checkState( + options.getFieldRange().contains(GetWorkerReportOptions.WorkerInfoField.ADDRESS)); + + ClientContext clientContext = ClientContext.create(Configuration.global()); + MasterClientContext masterContext = MasterClientContext.newBuilder(clientContext).build(); + + Preconditions.checkState( + primaryMasterClient.getRemoteSockAddress() instanceof InetSocketAddress); + InetSocketAddress primaryMasterAddress = + (InetSocketAddress) primaryMasterClient.getRemoteSockAddress(); + List masterAddresses = + ConfigurationUtils.getMasterRpcAddresses(configuration); + Preconditions.checkState(masterAddresses.contains(primaryMasterAddress)); + + Map> masterAddressToWorkerInfoMap = new HashMap<>(); + for (InetSocketAddress masterAddress : masterAddresses) { + try (BlockMasterClient client = new RetryHandlingBlockMasterClient( + masterContext, masterAddress, () -> new TimeoutRetry(RETRY_TIMEOUT, RETRY_INTERVAL))) { + List workerInfos = client.getWorkerReport(options); + masterAddressToWorkerInfoMap.put(masterAddress, workerInfos); + } catch (Exception e) { + if (masterAddress.equals(primaryMasterAddress)) { + LOG.error("Failed to get worker report from master: {}", masterContext, e); + throw e; + } + LOG.warn("Failed to get worker report from master: {}", masterContext, e); + } + } + return populateAllMastersWorkerInfo(primaryMasterAddress, masterAddressToWorkerInfoMap); + } + + @VisibleForTesting + @SuppressFBWarnings("WMI_WRONG_MAP_ITERATOR") + static AllMastersWorkerInfo populateAllMastersWorkerInfo( + InetSocketAddress primaryMasterAddress, + Map> masterAddressToWorkerInfoMap) { + Map>> workerIdInfoMap = new HashMap<>(); + Map workerIdAddressMap = new HashMap<>(); + List workerInfosFromPrimaryMaster = null; + + for (InetSocketAddress masterAddress : masterAddressToWorkerInfoMap.keySet()) { + List workerInfo = masterAddressToWorkerInfoMap.get(masterAddress); + if (masterAddress.equals(primaryMasterAddress)) { + workerInfosFromPrimaryMaster = workerInfo; + } + for (WorkerInfo info : workerInfo) { + workerIdInfoMap.compute(info.getId(), (k, v) -> { + if (v == null) { + v = new ArrayList<>(); + } + v.add(new Pair<>(masterAddress, info)); + return v; + }); + workerIdAddressMap.compute(info.getId(), (k, v) -> { + InetSocketAddress workerAddress = + InetSocketAddress.createUnresolved(info.getAddress().getHost(), + info.getAddress().getRpcPort()); + if (v == null) { + return workerAddress; + } + if (!v.equals(workerAddress)) { + throw new RuntimeException(String.format( + "The same worker id %d corresponds to multiple worker name %s %s", + k, v, workerAddress)); + } + return v; + }); + } + } + return new AllMastersWorkerInfo(workerIdAddressMap, + new ArrayList<>(masterAddressToWorkerInfoMap.keySet()), + primaryMasterAddress, + workerInfosFromPrimaryMaster, workerIdInfoMap); + } +} diff --git a/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java b/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java index f2a7f63eb7f6..83769689b75b 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java +++ b/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java @@ -48,7 +48,9 @@ import java.nio.ByteBuffer; import java.time.Duration; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Objects; import javax.annotation.concurrent.NotThreadSafe; /** @@ -388,6 +390,7 @@ private void updateStream() throws IOException { throw new IOException("No BlockInfo for block(id=" + blockId + ") of file" + "(id=" + mStatus.getFileId() + ", path=" + mStatus.getPath() + ")"); } + // Create stream boolean isBlockInfoOutdated = true; // blockInfo is "outdated" when all the locations in that blockInfo are failed workers, @@ -395,7 +398,8 @@ private void updateStream() throws IOException { if (mFailedWorkers.isEmpty() || mFailedWorkers.size() < blockInfo.getLocations().size()) { isBlockInfoOutdated = false; } else { - for (BlockLocation location : blockInfo.getLocations()) { + List locs = blockInfo.getLocations(); + for (BlockLocation location : locs) { if (!mFailedWorkers.containsKey(location.getWorkerAddress())) { isBlockInfoOutdated = false; break; @@ -420,6 +424,9 @@ private void closeBlockInStream(BlockInStream stream) throws IOException { if (stream == mBlockInStream) { // if stream is instance variable, set to null mBlockInStream = null; } + if (stream == mCachedPositionedReadStream) { + mCachedPositionedReadStream = null; + } if (blockSource == BlockInStream.BlockInStreamSource.NODE_LOCAL || blockSource == BlockInStream.BlockInStreamSource.PROCESS_LOCAL) { return; @@ -469,7 +476,13 @@ boolean triggerAsyncCaching(BlockInStream stream) { if (mPassiveCachingEnabled && mContext.hasNodeLocalWorker()) { // send request to local worker worker = mContext.getNodeLocalWorker(); - } else { // send request to data source + } else { + if (blockInfo.getLocations().stream() + .anyMatch(it -> Objects.equals(it.getWorkerAddress(), dataSource))) { + mLastBlockIdCached = blockId; + return false; + } + // send request to data source worker = dataSource; } try (CloseableResource blockWorker = @@ -512,4 +525,14 @@ private void handleRetryableException(BlockInStream stream, IOException e) { mFailedWorkers.put(workerAddress, System.currentTimeMillis()); } } + + @Override + public void unbuffer() { + if (mBlockInStream != null) { + mBlockInStream.unbuffer(); + } + if (mCachedPositionedReadStream != null) { + mCachedPositionedReadStream.unbuffer(); + } + } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileOutStream.java b/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileOutStream.java index 5930ddef119e..007cffd65f81 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileOutStream.java +++ b/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileOutStream.java @@ -170,6 +170,8 @@ public void close() throws IOException { } else { mUnderStorageOutputStream.close(); optionsBuilder.setUfsLength(mBytesWritten); + mUnderStorageOutputStream.getDataWriter().getUfsContentHash().ifPresent( + optionsBuilder::setContentHash); } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java index 6babc837edc0..1c4cfe421091 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java @@ -16,12 +16,16 @@ import alluxio.AlluxioURI; import alluxio.Constants; +import alluxio.client.ReadType; +import alluxio.client.WriteType; import alluxio.client.block.BlockStoreClient; import alluxio.client.block.BlockWorkerInfo; import alluxio.client.file.FileSystemContextReinitializer.ReinitBlockerResource; import alluxio.client.file.options.InStreamOptions; import alluxio.client.file.options.OutStreamOptions; import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.ConfigurationBuilder; +import alluxio.conf.OverlayConfiguration; import alluxio.conf.PropertyKey; import alluxio.exception.AlluxioException; import alluxio.exception.DirectoryNotEmptyException; @@ -38,6 +42,7 @@ import alluxio.exception.status.UnauthenticatedException; import alluxio.exception.status.UnavailableException; import alluxio.grpc.Bits; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; @@ -45,6 +50,8 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.LoadMetadataPType; @@ -55,12 +62,18 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.master.MasterInquireClient; import alluxio.resource.CloseableResource; import alluxio.security.authorization.AclEntry; import alluxio.uri.Authority; import alluxio.util.FileSystemOptionsUtils; +import alluxio.util.io.PathUtils; import alluxio.wire.BlockLocation; import alluxio.wire.BlockLocationInfo; import alluxio.wire.FileBlockInfo; @@ -80,6 +93,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.function.Consumer; import javax.annotation.concurrent.ThreadSafe; @@ -90,14 +104,24 @@ */ @ThreadSafe public class BaseFileSystem implements FileSystem { + private static final AlluxioConfiguration DIRECT_ACCESS_CONF = new ConfigurationBuilder() + .setProperty(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL, "0") + .setProperty(PropertyKey.USER_FILE_READ_TYPE_DEFAULT, ReadType.NO_CACHE) + .setProperty(PropertyKey.USER_FILE_WRITE_TYPE_DEFAULT, WriteType.THROUGH).build(); private static final Logger LOG = LoggerFactory.getLogger(BaseFileSystem.class); + /** Used to manage closeable resources. */ private final Closer mCloser = Closer.create(); protected final FileSystemContext mFsContext; protected final BlockStoreClient mBlockStore; + protected List mPathList; protected volatile boolean mClosed = false; + protected static final Error UNREACHABLE_CODE_ERROR = new Error("We should never reach here. " + + "wrapAndThrowAlluxioStatusException is guaranteed " + + "to throw an exception and never returns."); + /** * Constructs a new base file system. * @@ -136,7 +160,7 @@ public void checkAccess(AlluxioURI path, CheckAccessPOptions options) checkUri(path); rpc(client -> { CheckAccessPOptions mergedOptions = FileSystemOptionsUtils - .checkAccessDefaults(mFsContext.getPathConf(path)) + .checkAccessDefaults(getDirectAccessConf(path)) .toBuilder().mergeFrom(options).build(); client.checkAccess(path, mergedOptions); LOG.debug("Checked access {}, options: {}", path.getPath(), mergedOptions); @@ -144,13 +168,38 @@ public void checkAccess(AlluxioURI path, CheckAccessPOptions options) }); } + private boolean checkDirectAccess(AlluxioURI uri) { + if (!getConf().isSet(PropertyKey.USER_FILE_DIRECT_ACCESS)) { + return false; + } + if (mPathList == null) { + mPathList = getConf().getList(PropertyKey.USER_FILE_DIRECT_ACCESS); + } + return mPathList.stream().anyMatch(x -> { + try { + return PathUtils.hasPrefix(uri.getPath(), x); + } catch (InvalidPathException e) { + return false; + } + }); + } + + private AlluxioConfiguration getDirectAccessConf(AlluxioURI uri) { + AlluxioConfiguration inner = mFsContext.getPathConf(uri); + if (checkDirectAccess(uri)) { + return new OverlayConfiguration(DIRECT_ACCESS_CONF, inner); + } else { + return inner; + } + } + @Override public void createDirectory(AlluxioURI path, CreateDirectoryPOptions options) throws FileAlreadyExistsException, InvalidPathException, IOException, AlluxioException { checkUri(path); rpc(client -> { CreateDirectoryPOptions mergedOptions = FileSystemOptionsUtils.createDirectoryDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); client.createDirectory(path, mergedOptions); LOG.debug("Created directory {}, options: {}", path.getPath(), mergedOptions); return null; @@ -158,17 +207,17 @@ public void createDirectory(AlluxioURI path, CreateDirectoryPOptions options) } @Override - public FileOutStream createFile(AlluxioURI path, CreateFilePOptions options) + public FileOutStream createFile(AlluxioURI path, final CreateFilePOptions options) throws FileAlreadyExistsException, InvalidPathException, IOException, AlluxioException { checkUri(path); return rpc(client -> { CreateFilePOptions mergedOptions = FileSystemOptionsUtils.createFileDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); URIStatus status = client.createFile(path, mergedOptions); LOG.debug("Created file {}, options: {}", path.getPath(), mergedOptions); OutStreamOptions outStreamOptions = new OutStreamOptions(mergedOptions, mFsContext, - mFsContext.getPathConf(path)); + getDirectAccessConf(path)); outStreamOptions.setUfsPath(status.getUfsPath()); outStreamOptions.setMountId(status.getMountId()); outStreamOptions.setAcl(status.getAcl()); @@ -187,7 +236,7 @@ public void delete(AlluxioURI path, DeletePOptions options) checkUri(path); rpc(client -> { DeletePOptions mergedOptions = FileSystemOptionsUtils.deleteDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); client.delete(path, mergedOptions); LOG.debug("Deleted {}, options: {}", path.getPath(), mergedOptions); return null; @@ -200,7 +249,7 @@ public boolean exists(AlluxioURI path, final ExistsPOptions options) checkUri(path); return rpc(client -> { ExistsPOptions mergedOptions = FileSystemOptionsUtils.existsDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); return client.exists(path, mergedOptions); }); } @@ -210,7 +259,7 @@ public void free(AlluxioURI path, final FreePOptions options) throws FileDoesNotExistException, IOException, AlluxioException { checkUri(path); rpc(client -> { - FreePOptions mergedOptions = FileSystemOptionsUtils.freeDefaults(mFsContext.getPathConf(path)) + FreePOptions mergedOptions = FileSystemOptionsUtils.freeDefaults(getDirectAccessConf(path)) .toBuilder().mergeFrom(options).build(); client.free(path, mergedOptions); LOG.debug("Freed {}, options: {}", path.getPath(), mergedOptions); @@ -266,7 +315,7 @@ public URIStatus getStatus(AlluxioURI path, final GetStatusPOptions options) checkUri(path); URIStatus status = rpc(client -> { GetStatusPOptions mergedOptions = FileSystemOptionsUtils.getStatusDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); return client.getStatus(path, mergedOptions); }); if (!status.isCompleted()) { @@ -282,7 +331,7 @@ public List listStatus(AlluxioURI path, final ListStatusPOptions opti return rpc(client -> { // TODO(calvin): Fix the exception handling in the master ListStatusPOptions mergedOptions = FileSystemOptionsUtils.listStatusDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); return client.listStatus(path, mergedOptions); }); } @@ -295,7 +344,7 @@ public void iterateStatus(AlluxioURI path, final ListStatusPOptions options, rpc(client -> { // TODO(calvin): Fix the exception handling in the master ListStatusPOptions mergedOptions = FileSystemOptionsUtils.listStatusDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); client.iterateStatus(path, mergedOptions, action); return null; }); @@ -308,7 +357,7 @@ public ListStatusPartialResult listStatusPartial( checkUri(path); return rpc(client -> { ListStatusPartialPOptions mergedOptions = FileSystemOptionsUtils.listStatusPartialDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); return client.listStatusPartial(path, mergedOptions); }); } @@ -319,7 +368,7 @@ public void loadMetadata(AlluxioURI path, final ListStatusPOptions options) checkUri(path); rpc(client -> { ListStatusPOptions mergedOptions = FileSystemOptionsUtils.listStatusDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options) + getDirectAccessConf(path)).toBuilder().mergeFrom(options) .setLoadMetadataType(LoadMetadataPType.ALWAYS).setLoadMetadataOnly(true).build(); client.listStatus(path, mergedOptions); return null; @@ -371,7 +420,7 @@ public void persist(final AlluxioURI path, final ScheduleAsyncPersistencePOption rpc(client -> { ScheduleAsyncPersistencePOptions mergedOptions = FileSystemOptionsUtils - .scheduleAsyncPersistDefaults(mFsContext.getPathConf(path)).toBuilder() + .scheduleAsyncPersistDefaults(getDirectAccessConf(path)).toBuilder() .mergeFrom(options).build(); client.scheduleAsyncPersist(path, mergedOptions); LOG.debug("Scheduled persist for {}, options: {}", path.getPath(), mergedOptions); @@ -384,12 +433,12 @@ public FileInStream openFile(AlluxioURI path, OpenFilePOptions options) throws FileDoesNotExistException, OpenDirectoryException, FileIncompleteException, IOException, AlluxioException { checkUri(path); - AlluxioConfiguration conf = mFsContext.getPathConf(path); - URIStatus status = getStatus(path, - FileSystemOptionsUtils.getStatusDefaults(conf).toBuilder() - .setAccessMode(Bits.READ) - .setUpdateTimestamps(options.getUpdateLastAccessTime()) - .build()); + AlluxioConfiguration conf = getDirectAccessConf(path); + GetStatusPOptions opt = FileSystemOptionsUtils.getStatusDefaults(conf) + .toBuilder() + .setAccessMode(Bits.READ) + .setUpdateTimestamps(options.getUpdateLastAccessTime()).build(); + URIStatus status = getStatus(path, opt); return openFile(status, options); } @@ -404,7 +453,7 @@ public FileInStream openFile(URIStatus status, OpenFilePOptions options) if (!status.isCompleted()) { throw new FileIncompleteException(path); } - AlluxioConfiguration conf = mFsContext.getPathConf(path); + AlluxioConfiguration conf = getDirectAccessConf(path); OpenFilePOptions mergedOptions = FileSystemOptionsUtils.openFileDefaults(conf) .toBuilder().mergeFrom(options).build(); InStreamOptions inStreamOptions = new InStreamOptions(status, mergedOptions, conf, mFsContext); @@ -442,7 +491,7 @@ public void setAcl(AlluxioURI path, SetAclAction action, List entries, checkUri(path); rpc(client -> { SetAclPOptions mergedOptions = FileSystemOptionsUtils.setAclDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); client.setAcl(path, action, entries, mergedOptions); LOG.debug("Set ACL for {}, entries: {} options: {}", path.getPath(), entries, mergedOptions); @@ -455,7 +504,7 @@ public void setAttribute(AlluxioURI path, SetAttributePOptions options) throws FileDoesNotExistException, IOException, AlluxioException { checkUri(path); SetAttributePOptions mergedOptions = - FileSystemOptionsUtils.setAttributeClientDefaults(mFsContext.getPathConf(path)) + FileSystemOptionsUtils.setAttributeClientDefaults(getDirectAccessConf(path)) .toBuilder().mergeFrom(options).build(); rpc(client -> { client.setAttribute(path, mergedOptions); @@ -499,7 +548,7 @@ public void unmount(AlluxioURI path, UnmountPOptions options) checkUri(path); rpc(client -> { UnmountPOptions mergedOptions = FileSystemOptionsUtils.unmountDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); client.unmount(path); LOG.debug("Unmounted {}, options: {}", path.getPath(), mergedOptions); return null; @@ -516,6 +565,67 @@ public void needsSync(AlluxioURI path) }); } + @Override + public Optional submitJob(JobRequest jobRequest) { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().submitJob(jobRequest); + } + } + + @Override + public boolean stopJob(JobDescription jobDescription) { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().stopJob(jobDescription); + } + } + + @Override + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().getJobProgress(jobDescription, format, verbose); + } + } + + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().syncMetadata(path, options); + } + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().syncMetadataAsync(path, options); + } + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) + throws FileDoesNotExistException, IOException, AlluxioException { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().getSyncProgress(taskGroupId); + } + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws IOException, AlluxioException { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().cancelSyncMetadata(taskGroupId); + } + } + /** * Checks an {@link AlluxioURI} for scheme and authority information. Warn the user and throw an * exception if necessary. @@ -589,22 +699,38 @@ R rpc(RpcCallable fn) // Explicitly connect to trigger loading configuration from meta master. client.get().connect(); return fn.call(client.get()); - } catch (NotFoundException e) { + } catch (AlluxioStatusException e) { + wrapAndThrowAlluxioStatusException(e); + throw UNREACHABLE_CODE_ERROR; + } + } + + protected void wrapAndThrowAlluxioStatusException(AlluxioStatusException e) + throws AlluxioException, IOException { + if (e instanceof NotFoundException) { throw new FileDoesNotExistException(e.getMessage()); - } catch (AlreadyExistsException e) { + } + if (e instanceof AlreadyExistsException) { throw new FileAlreadyExistsException(e.getMessage()); - } catch (InvalidArgumentException e) { + } + if (e instanceof InvalidArgumentException) { throw new InvalidPathException(e.getMessage()); - } catch (FailedPreconditionException e) { + } + if (e instanceof FailedPreconditionException) { // A little sketchy, but this should be the only case that throws FailedPrecondition. throw new DirectoryNotEmptyException(e.getMessage()); - } catch (UnavailableException e) { - throw e; - } catch (UnauthenticatedException e) { + } + if (e instanceof UnavailableException || e instanceof UnauthenticatedException) { throw e; - } catch (AlluxioStatusException e) { - throw e.toAlluxioException(); } + throw e.toAlluxioException(); + } + + /** + * @return the file system context + */ + public FileSystemContext getFileSystemContext() { + return mFsContext; } /** diff --git a/core/client/fs/src/main/java/alluxio/client/file/ConfigHashSync.java b/core/client/fs/src/main/java/alluxio/client/file/ConfigHashSync.java index 144be4e7f6f1..b94ffd6d0651 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/ConfigHashSync.java +++ b/core/client/fs/src/main/java/alluxio/client/file/ConfigHashSync.java @@ -71,7 +71,7 @@ public Optional getException() { } @Override - public synchronized void heartbeat() { + public synchronized void heartbeat(long timeLimitMs) { if (!mContext.getClientContext().getClusterConf().clusterDefaultsLoaded()) { // Wait until the initial cluster defaults are loaded. return; diff --git a/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java index d0b5e1801721..e8d63da69e82 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java @@ -20,6 +20,7 @@ import alluxio.exception.FileIncompleteException; import alluxio.exception.InvalidPathException; import alluxio.exception.OpenDirectoryException; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; @@ -27,6 +28,8 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.MountPOptions; @@ -36,7 +39,12 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.security.authorization.AclEntry; import alluxio.wire.BlockLocationInfo; import alluxio.wire.MountPointInfo; @@ -45,6 +53,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.Consumer; /** @@ -241,8 +250,55 @@ public void needsSync(AlluxioURI path) throws IOException, AlluxioException { mDelegatedFileSystem.needsSync(path); } + @Override + public Optional submitJob(JobRequest jobRequest) { + return mDelegatedFileSystem.submitJob(jobRequest); + } + + @Override + public boolean stopJob(JobDescription jobDescription) { + return mDelegatedFileSystem.stopJob(jobDescription); + } + + @Override + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { + return mDelegatedFileSystem.getJobProgress(jobDescription, format, verbose); + } + + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + return mDelegatedFileSystem.syncMetadata(path, options); + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + return mDelegatedFileSystem.syncMetadataAsync(path, options); + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) + throws FileDoesNotExistException, IOException, AlluxioException { + return mDelegatedFileSystem.getSyncProgress(taskGroupId); + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws IOException, AlluxioException { + return mDelegatedFileSystem.cancelSyncMetadata(taskGroupId); + } + @Override public void close() throws IOException { mDelegatedFileSystem.close(); } + + /** + * @return the underlying fileSystem + */ + public FileSystem getUnderlyingFileSystem() { + return mDelegatedFileSystem; + } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileInStream.java b/core/client/fs/src/main/java/alluxio/client/file/FileInStream.java index 473ca4e9bf4d..cc61f7fb2be0 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileInStream.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileInStream.java @@ -13,6 +13,7 @@ import alluxio.Seekable; import alluxio.client.BoundedStream; +import alluxio.client.CanUnbuffer; import alluxio.client.PositionedReadable; import alluxio.exception.PreconditionMessage; import alluxio.util.io.BufferUtils; @@ -29,7 +30,7 @@ * into a given offset of the stream to read. */ public abstract class FileInStream extends InputStream - implements BoundedStream, PositionedReadable, Seekable { + implements BoundedStream, PositionedReadable, Seekable, CanUnbuffer { private final byte[] mSingleByte = new byte[1]; @Override @@ -101,4 +102,8 @@ public int read(ByteBuffer byteBuffer, int off, int len) throws IOException { } return nread; } + + @Override + public void unbuffer() { + } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java index cbf6a9534c01..61778f425346 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java @@ -30,6 +30,7 @@ import alluxio.exception.InvalidPathException; import alluxio.exception.OpenDirectoryException; import alluxio.exception.status.AlluxioStatusException; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; @@ -37,6 +38,8 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.LoadMetadataPOptions; @@ -48,7 +51,12 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.security.authorization.AclEntry; import alluxio.security.user.UserState; import alluxio.util.CommonUtils; @@ -67,6 +75,7 @@ import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import javax.security.auth.Subject; @@ -157,6 +166,7 @@ public static FileSystem create(FileSystemContext context) { /** * @param context the FileSystemContext to use with the FileSystem + * @param options the options associate with the FileSystem * @return a new FileSystem instance */ public static FileSystem create(FileSystemContext context, FileSystemOptions options) { @@ -737,4 +747,69 @@ default void unmount(AlluxioURI path) throws IOException, AlluxioException { * @param path the path needing synchronization */ void needsSync(AlluxioURI path) throws IOException, AlluxioException; + + /** + * Submit a job to scheduler. + * + * @param jobRequest the job request + * @return job id if job is submitted, empty if a job with description already exists + */ + Optional submitJob(JobRequest jobRequest); + + /** + * Stop a job in scheduler. + * + * @param jobDescription the job description + * @return true if job is stopped, false if we cannot find job + */ + boolean stopJob(JobDescription jobDescription); + + /** + * Get progress of a job. + * + * @param jobDescription the job description + * @param format progress report format + * @param verbose whether to return verbose report + * @return the job progress + */ + String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose); + + /** + * Syncs metadata for a given alluxio path. + * + * @param path the path to sync metadata on + * @param options options to associate with this operation + * @return the sync metadata response + */ + SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException; + + /** + * Syncs metadata asynchronously for a given alluxio path. + * + * @param path the path to sync metadata on + * @param options options to associate with this operation + * @return the sync metadata async response + */ + SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException; + + /** + * Gets the sync progress. + * + * @param taskGroupId the task group id + * @return the sync progress + */ + GetSyncProgressPResponse getSyncProgress(long taskGroupId) + throws FileDoesNotExistException, IOException, AlluxioException; + + /** + * Cancels an ongoing metadata sync. + * + * @param taskGroupId the task group id + * @return the cancellation result + */ + CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws IOException, AlluxioException; } diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java index d7dd61b218c9..9843c09b3c53 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java @@ -414,13 +414,16 @@ public ReinitBlockerResource blockReinit() { */ public void reinit(boolean updateClusterConf, boolean updatePathConf) throws UnavailableException, IOException { + // inquiry primary master address before entering the critical session of mReinitializer, + // where all RPCs wait for the monitor object of FileSystemContext (synchronized methods) + // will block until initialization completes + InetSocketAddress masterAddr; + try { + masterAddr = getMasterAddress(); + } catch (IOException e) { + throw new UnavailableException("Failed to get master address during reinitialization", e); + } try (Closeable r = mReinitializer.allow()) { - InetSocketAddress masterAddr; - try { - masterAddr = getMasterAddress(); - } catch (IOException e) { - throw new UnavailableException("Failed to get master address during reinitialization", e); - } try { getClientContext().loadConf(masterAddr, updateClusterConf, updatePathConf); } catch (AlluxioStatusException e) { @@ -431,7 +434,7 @@ public void reinit(boolean updateClusterConf, boolean updatePathConf) + "meta master (%s) during reinitialization", masterAddr), e); } LOG.debug("Reinitializing FileSystemContext: update cluster conf: {}, update path conf:" - + " {}", updateClusterConf, updateClusterConf); + + " {}", updateClusterConf, updatePathConf); closeContext(); ReconfigurableRegistry.update(); initContext(getClientContext(), mMasterAddresses != null @@ -719,11 +722,9 @@ private void initializeLocalWorker() throws IOException { */ private List getWorkerAddresses() throws IOException { List infos; - BlockMasterClient blockMasterClient = mBlockMasterClientPool.acquire(); - try { - infos = blockMasterClient.getWorkerInfoList(); - } finally { - mBlockMasterClientPool.release(blockMasterClient); + try (CloseableResource masterClientResource = + acquireBlockMasterClientResource()) { + infos = masterClientResource.get().getWorkerInfoList(); } if (infos.isEmpty()) { throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage()); diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystemContextReinitializer.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystemContextReinitializer.java index 78ae526be8e6..ae7e9049e95c 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystemContextReinitializer.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystemContextReinitializer.java @@ -66,7 +66,7 @@ public FileSystemContextReinitializer(FileSystemContext context) { mExecutor = new ConfigHashSync(context); mFuture = REINIT_EXECUTOR.scheduleAtFixedRate(() -> { try { - mExecutor.heartbeat(); + mExecutor.heartbeat(Long.MAX_VALUE); } catch (Exception e) { LOG.error("Uncaught exception in config heartbeat executor, shutting down", e); } diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java index 3d643329d21f..43e81540be9f 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java @@ -16,6 +16,7 @@ import alluxio.exception.status.AlluxioStatusException; import alluxio.exception.status.AlreadyExistsException; import alluxio.exception.status.NotFoundException; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CheckConsistencyPOptions; import alluxio.grpc.CompleteFilePOptions; @@ -25,6 +26,8 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.MountPOptions; @@ -33,7 +36,12 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UpdateUfsModePOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.master.MasterClientContext; import alluxio.security.authorization.AclEntry; import alluxio.wire.MountPointInfo; @@ -41,6 +49,7 @@ import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.Consumer; /** @@ -342,4 +351,65 @@ void updateUfsMode(AlluxioURI ufsUri, UpdateUfsModePOptions options) * @param path the path to invalidate */ void needsSync(AlluxioURI path) throws AlluxioStatusException; + + /** + * Submit a job to scheduler. + * + * @param job the job request to submit + * @return jobId if job is submitted, empty if a job already exists + */ + Optional submitJob(JobRequest job); + + /** + * Stop a job. + * + * @param jobDescription job description be stopped + * @return true if job is stopped, false if we cannot find job + */ + boolean stopJob(JobDescription jobDescription); + + /** + * Get progress of a job. + * + * @param jobDescription job description to get progress + * @param format progress report format + * @param verbose whether to return verbose report + * @return the job progress + */ + String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose); + + /** + * Syncs metadata for a given alluxio path. + * + * @param path the path to sync metadata on + * @param options options to associate with this operation + * @return the sync metadata response + */ + SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException; + + /** + * Syncs metadata for a given alluxio path asynchronously. + * + * @param path the path to sync metadata on + * @param options options to associate with this operation + * @return the sync metadata response + */ + SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException; + + /** + * Gets the sync progress. + * @param taskGroupId the task group id + * @return the sync progress + */ + GetSyncProgressPResponse getSyncProgress(long taskGroupId) throws AlluxioStatusException; + + /** + * Cancels an ongoing metadata sync. + * @param taskGroupId the task group id + * @return the cancellation result + */ + CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) throws AlluxioStatusException; } diff --git a/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java b/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java index ecd6f6962884..42d714f76fe7 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java @@ -15,6 +15,8 @@ import alluxio.AlluxioURI; import alluxio.Constants; import alluxio.exception.status.AlluxioStatusException; +import alluxio.grpc.CancelSyncMetadataPRequest; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CheckAccessPRequest; import alluxio.grpc.CheckConsistencyPOptions; @@ -33,6 +35,8 @@ import alluxio.grpc.FreePOptions; import alluxio.grpc.FreePRequest; import alluxio.grpc.GetFilePathPRequest; +import alluxio.grpc.GetJobProgressPRequest; +import alluxio.grpc.GetJobProgressPResponse; import alluxio.grpc.GetMountTablePRequest; import alluxio.grpc.GetNewBlockIdForFilePOptions; import alluxio.grpc.GetNewBlockIdForFilePRequest; @@ -41,7 +45,11 @@ import alluxio.grpc.GetStatusPOptions; import alluxio.grpc.GetStatusPRequest; import alluxio.grpc.GetSyncPathListPRequest; +import alluxio.grpc.GetSyncProgressPRequest; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.GrpcUtils; +import alluxio.grpc.JobProgressPOptions; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPRequest; import alluxio.grpc.ListStatusPartialPOptions; @@ -61,18 +69,30 @@ import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.SetAttributePRequest; import alluxio.grpc.StartSyncPRequest; +import alluxio.grpc.StopJobPRequest; +import alluxio.grpc.StopJobPResponse; import alluxio.grpc.StopSyncPRequest; +import alluxio.grpc.SubmitJobPRequest; +import alluxio.grpc.SubmitJobPResponse; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPRequest; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPOptions; import alluxio.grpc.UnmountPRequest; import alluxio.grpc.UpdateMountPRequest; import alluxio.grpc.UpdateUfsModePOptions; import alluxio.grpc.UpdateUfsModePRequest; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.master.MasterClientContext; import alluxio.retry.CountingRetry; import alluxio.security.authorization.AclEntry; import alluxio.util.FileSystemOptionsUtils; import alluxio.wire.SyncPointInfo; +import com.google.protobuf.ByteString; +import org.apache.commons.lang3.SerializationUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -80,6 +100,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Spliterator; import java.util.Spliterators; import java.util.function.Consumer; @@ -418,6 +439,98 @@ public void needsSync(AlluxioURI path) throws AlluxioStatusException { RPC_LOG, "NeedsSync", "path=%s", path); } + @Override + public Optional submitJob(JobRequest job) { + connectWithRuntimeException(); + final ByteString requestBody = ByteString.copyFrom(SerializationUtils.serialize(job)); + SubmitJobPRequest request = SubmitJobPRequest + .newBuilder() + .setRequestBody(requestBody) + .build(); + SubmitJobPResponse response = mClient.submitJob(request); + return response.hasJobId() ? Optional.of(response.getJobId()) : Optional.empty(); + } + + @Override + public boolean stopJob(JobDescription jobDescription) { + connectWithRuntimeException(); + StopJobPResponse response = mClient.stopJob(StopJobPRequest + .newBuilder() + .setJobDescription(alluxio.grpc.JobDescription + .newBuilder() + .setType(jobDescription.getType()) + .setPath(jobDescription.getPath()) + .build()) + .build()); + return response.getJobStopped(); + } + + @Override + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { + JobProgressPOptions.Builder options = JobProgressPOptions.newBuilder() + .setVerbose(verbose) + .setFormat(format); + connectWithRuntimeException(); + GetJobProgressPResponse response = mClient.getJobProgress( + GetJobProgressPRequest.newBuilder() + .setJobDescription(alluxio.grpc.JobDescription + .newBuilder() + .setType(jobDescription.getType()) + .setPath(jobDescription.getPath()) + .build()) + .setOptions(options.build()) + .build()); + return response.getProgressReport(); + } + + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException { + return retryRPC(() -> { + SyncMetadataPRequest request = SyncMetadataPRequest.newBuilder() + .setPath(path.getPath()) + .setOptions(options) + .build(); + SyncMetadataPResponse response = mClient.syncMetadata(request); + return response; + }, RPC_LOG, "SyncMetadata", "path=%s,options=%s", path, options); + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException { + return retryRPC(() -> { + SyncMetadataPRequest request = SyncMetadataPRequest.newBuilder() + .setPath(path.getPath()) + .setOptions(options) + .build(); + SyncMetadataAsyncPResponse response = mClient.syncMetadataAsync(request); + return response; + }, RPC_LOG, "SyncMetadataAsync", "path=%s,options=%s", path, options); + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) throws AlluxioStatusException { + return retryRPC(() -> { + GetSyncProgressPRequest request = GetSyncProgressPRequest.newBuilder() + .setTaskGroupId(taskGroupId) + .build(); + return mClient.getSyncProgress(request); + }, RPC_LOG, "GetSyncProgress", "taskGroupId=%s", taskGroupId); + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws AlluxioStatusException { + return retryRPC(() -> { + CancelSyncMetadataPRequest request = CancelSyncMetadataPRequest.newBuilder() + .setTaskGroupId(taskGroupId) + .build(); + return mClient.cancelSyncMetadata(request); + }, RPC_LOG, "CancelSyncMetadata", "taskGroupId=%s", taskGroupId); + } + /** * Gets the path that will be transported to master. * diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManager.java b/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManager.java index 4f0e8f5c1fb4..2ca67855a047 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManager.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManager.java @@ -30,6 +30,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; +import java.util.function.Predicate; import javax.annotation.concurrent.GuardedBy; /** @@ -297,4 +298,12 @@ default List getCachedPageIdsByFileId(String fileId, long fileLength) { * @return true if append was successful */ boolean append(PageId pageId, int appendAt, byte[] page, CacheContext cacheContext); + + /** + * Invalidate the pages that match the given predicate. + * @param predicate + */ + default void invalidate(Predicate predicate) { + throw new UnsupportedOperationException(); + } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManagerOptions.java b/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManagerOptions.java index c55155758a6e..40c42f501423 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManagerOptions.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManagerOptions.java @@ -30,6 +30,9 @@ public class CacheManagerOptions { private long mPageSize; private List mPageStoreOptions; private boolean mQuotaEnabled; + private boolean mTtlEnabled; + private long mTtlCheckIntervalSeconds; + private long mTtlThresholdSeconds; /** * @param conf @@ -50,6 +53,10 @@ public static CacheManagerOptions create(AlluxioConfiguration conf) { .setMaxEvictionRetries(conf.getInt(PropertyKey.USER_CLIENT_CACHE_EVICTION_RETRIES)) .setPageSize(conf.getBytes(PropertyKey.USER_CLIENT_CACHE_PAGE_SIZE)) .setQuotaEnabled(conf.getBoolean(PropertyKey.USER_CLIENT_CACHE_QUOTA_ENABLED)) + .setTtlEnabled(conf.getBoolean(PropertyKey.USER_CLIENT_CACHE_TTL_ENABLED)) + .setTtlCheckIntervalSeconds( + conf.getLong(PropertyKey.USER_CLIENT_CACHE_TTL_CHECK_INTERVAL_SECONDS)) + .setTtlThresholdSeconds(conf.getLong(PropertyKey.USER_CLIENT_CACHE_TTL_THRESHOLD_SECONDS)) .setCacheEvictorOptions(cacheEvictorOptions) .setPageStoreOptions(PageStoreOptions.create(conf)); return options; @@ -113,6 +120,28 @@ public boolean isQuotaEnabled() { return mQuotaEnabled; } + /** + * @return if cache ttl is enabled + */ + public boolean isTtlEnabled() { + return mTtlEnabled; + } + + /** + * @return the check interval of ttl + */ + public long getTtlCheckIntervalSeconds() { + return mTtlCheckIntervalSeconds; + } + + /** + * + * @return the time threshold of cache ttl + */ + public long getTtlThresholdSeconds() { + return mTtlThresholdSeconds; + } + /** * @return max eviction retires */ @@ -213,4 +242,31 @@ public CacheManagerOptions setPageStoreOptions( mPageStoreOptions = pageStoreOptions; return this; } + + /** + * @param isTtlEnabled + * @return the updated options + */ + public CacheManagerOptions setTtlEnabled(boolean isTtlEnabled) { + mTtlEnabled = isTtlEnabled; + return this; + } + + /** + * @param checkIntervalSeconds + * @return the updated options + */ + public CacheManagerOptions setTtlCheckIntervalSeconds(long checkIntervalSeconds) { + mTtlCheckIntervalSeconds = checkIntervalSeconds; + return this; + } + + /** + * @param thresholdSeconds + * @return the updated options + */ + public CacheManagerOptions setTtlThresholdSeconds(long thresholdSeconds) { + mTtlThresholdSeconds = thresholdSeconds; + return this; + } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileInStream.java b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileInStream.java index f8784a74b43d..fa78f002c55e 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileInStream.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileInStream.java @@ -309,6 +309,13 @@ public void seek(long pos) { mPosition = pos; } + @Override + public void unbuffer() { + if (mExternalFileInStream != null) { + mExternalFileInStream.unbuffer(); + } + } + /** * Convenience method to ensure the stream is not closed. */ diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileSystem.java index a54ef7eaacc8..ed541972ce6b 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileSystem.java @@ -16,6 +16,7 @@ import alluxio.client.file.FileInStream; import alluxio.client.file.FileSystem; import alluxio.client.file.URIStatus; +import alluxio.client.file.cache.filter.CacheFilter; import alluxio.conf.AlluxioConfiguration; import alluxio.exception.AlluxioException; import alluxio.grpc.OpenFilePOptions; @@ -32,6 +33,7 @@ public class LocalCacheFileSystem extends DelegatingFileSystem { private static final Logger LOG = LoggerFactory.getLogger(LocalCacheFileSystem.class); private final CacheManager mCacheManager; + private final CacheFilter mCacheFilter; private final AlluxioConfiguration mConf; /** @@ -43,6 +45,7 @@ public LocalCacheFileSystem(CacheManager cacheManage, FileSystem fs, AlluxioConf super(fs); mCacheManager = Preconditions.checkNotNull(cacheManage, "cacheManager"); mConf = Preconditions.checkNotNull(conf, "conf"); + mCacheFilter = CacheFilter.create(conf); } @Override @@ -62,7 +65,8 @@ public FileInStream openFile(AlluxioURI path, OpenFilePOptions options) @Override public FileInStream openFile(URIStatus status, OpenFilePOptions options) throws IOException, AlluxioException { - if (mCacheManager == null || mCacheManager.state() == CacheManager.State.NOT_IN_USE) { + if (mCacheManager == null || mCacheManager.state() == CacheManager.State.NOT_IN_USE + || !mCacheFilter.needsCache(status)) { return mDelegatedFileSystem.openFile(status, options); } return new LocalCacheFileInStream(status, diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java index a1dad88b169b..f3edb5db43bc 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java @@ -14,6 +14,8 @@ import static alluxio.client.file.cache.CacheManager.State.NOT_IN_USE; import static alluxio.client.file.cache.CacheManager.State.READ_ONLY; import static alluxio.client.file.cache.CacheManager.State.READ_WRITE; +import static java.util.concurrent.Executors.newScheduledThreadPool; +import static java.util.concurrent.TimeUnit.SECONDS; import alluxio.client.file.CacheContext; import alluxio.client.file.cache.store.ByteArrayTargetBuffer; @@ -44,12 +46,14 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.SynchronousQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.Predicate; import javax.annotation.Nullable; import javax.annotation.concurrent.GuardedBy; import javax.annotation.concurrent.ThreadSafe; @@ -88,6 +92,8 @@ public class LocalCacheManager implements CacheManager { private final Optional mInitService; /** Executor service for execute the async cache tasks. */ private final Optional mAsyncCacheExecutor; + /** Executor service for execute the cache ttl check tasks. */ + private final Optional mTtlEnforcerExecutor; private final ConcurrentHashSet mPendingRequests; /** State of this cache. */ private final AtomicReference mState = new AtomicReference<>(); @@ -141,6 +147,21 @@ public static LocalCacheManager create(CacheManagerOptions options, mInitService = options.isAsyncRestoreEnabled() ? Optional.of(Executors.newSingleThreadExecutor()) : Optional.empty(); + if (options.isTtlEnabled()) { + mTtlEnforcerExecutor = Optional.of(newScheduledThreadPool(1)); + mTtlEnforcerExecutor.get().scheduleAtFixedRate(() -> + LocalCacheManager.this.invalidate(pageInfo -> { + try { + return System.currentTimeMillis() - pageInfo.getCreatedTimestamp() + >= options.getTtlThresholdSeconds() * 1000; + } catch (Exception ex) { + // In case of any exception, do not invalidate the cache + return false; + } + }), 0, options.getTtlCheckIntervalSeconds(), SECONDS); + } else { + mTtlEnforcerExecutor = Optional.empty(); + } Metrics.registerGauges(mCacheSize, mPageMetaStore); mState.set(READ_ONLY); Metrics.STATE.inc(); @@ -150,7 +171,8 @@ public static LocalCacheManager create(CacheManagerOptions options, * @param pageId page identifier * @return the page lock id */ - private int getPageLockId(PageId pageId) { + @VisibleForTesting + public int getPageLockId(PageId pageId) { return Math.floorMod((int) (pageId.getFileId().hashCode() + pageId.getPageIndex()), LOCK_SIZE); } @@ -639,6 +661,21 @@ public List getCachedPageIdsByFileId(String fileId, long fileLength) { return pageIds; } + @Override + public void invalidate(Predicate predicate) { + mPageStoreDirs.forEach(dir -> { + try { + dir.scanPages(pageInfo -> { + if (pageInfo.isPresent() && predicate.test(pageInfo.get())) { + delete(pageInfo.get().getPageId()); + } + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + @Override public void close() throws Exception { for (PageStoreDir pageStoreDir: mPageStoreDirs) { @@ -647,6 +684,7 @@ public void close() throws Exception { mPageMetaStore.reset(); mInitService.ifPresent(ExecutorService::shutdownNow); mAsyncCacheExecutor.ifPresent(ExecutorService::shutdownNow); + mTtlEnforcerExecutor.ifPresent(ExecutorService::shutdownNow); } /** @@ -679,7 +717,7 @@ private int getPage(PageInfo pageInfo, int pageOffset, int bytesToRead, return -1; } } catch (IOException | PageNotFoundException e) { - LOG.error("Failed to get existing page {} from pageStore", pageInfo.getPageId(), e); + LOG.debug("Failed to get existing page {} from pageStore", pageInfo.getPageId(), e); return -1; } return bytesToRead; diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/PageInfo.java b/core/client/fs/src/main/java/alluxio/client/file/cache/PageInfo.java index 067d4980cc85..5bbaf90b5a9a 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/PageInfo.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/PageInfo.java @@ -28,6 +28,7 @@ public class PageInfo { private final long mPageSize; private final CacheScope mCacheScope; private final PageStoreDir mLocalCacheDir; + private final long mCreatedTimestamp; /** * @param pageId page id @@ -46,10 +47,23 @@ public PageInfo(PageId pageId, long pageSize, PageStoreDir pageStoreDir) { */ public PageInfo(PageId pageId, long pageSize, CacheScope cacheScope, PageStoreDir pageStoreDir) { + this(pageId, pageSize, cacheScope, pageStoreDir, System.currentTimeMillis()); + } + + /** + * @param pageId page id + * @param pageSize page size in bytes + * @param cacheScope scope of this page + * @param pageStoreDir directory of this page + * @param createdTimestamp created time + */ + public PageInfo(PageId pageId, long pageSize, CacheScope cacheScope, + PageStoreDir pageStoreDir, long createdTimestamp) { mPageId = pageId; mPageSize = pageSize; mCacheScope = cacheScope; mLocalCacheDir = pageStoreDir; + mCreatedTimestamp = createdTimestamp; } /** @@ -80,6 +94,13 @@ public PageStoreDir getLocalCacheDir() { return mLocalCacheDir; } + /** + * @return the created time + */ + public long getCreatedTimestamp() { + return mCreatedTimestamp; + } + @Override public boolean equals(Object o) { if (this == o) { diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/evictor/TwoChoiceRandomEvictor.java b/core/client/fs/src/main/java/alluxio/client/file/cache/evictor/TwoChoiceRandomEvictor.java new file mode 100644 index 000000000000..8778431f2204 --- /dev/null +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/evictor/TwoChoiceRandomEvictor.java @@ -0,0 +1,94 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.file.cache.evictor; + +import alluxio.client.file.cache.PageId; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.function.Predicate; +import javax.annotation.Nullable; +import javax.annotation.concurrent.ThreadSafe; + +/** + * Two Choice Random client-side cache eviction policy. + * It selects two random page IDs and evicts the one least-recently used. + */ +@ThreadSafe +public class TwoChoiceRandomEvictor implements CacheEvictor { + private final Map mCache = Collections.synchronizedMap(new HashMap<>()); + + /** + * Constructor. + * @param options + */ + public TwoChoiceRandomEvictor(CacheEvictorOptions options) { + } + + @Override + public void updateOnGet(PageId pageId) { + mCache.put(pageId, Instant.now().toEpochMilli()); + } + + @Override + public void updateOnPut(PageId pageId) { + mCache.put(pageId, Instant.now().toEpochMilli()); + } + + @Override + public void updateOnDelete(PageId pageId) { + mCache.remove(pageId); + } + + @Nullable + @Override + public PageId evict() { + synchronized (mCache) { + if (mCache.isEmpty()) { + return null; + } + + // TODO(chunxu): improve the performance here + List keys = new ArrayList<>(mCache.keySet()); + Random rand = new Random(); + PageId key1 = keys.get(rand.nextInt(keys.size())); + PageId key2 = keys.get(rand.nextInt(keys.size())); + if (mCache.get(key1) < mCache.get(key2)) { + return key1; + } + return key2; + } + } + + @Nullable + @Override + public PageId evictMatching(Predicate criterion) { + synchronized (mCache) { + for (PageId candidate : mCache.keySet()) { + if (criterion.test(candidate)) { + return candidate; + } + } + return null; + } + } + + @Override + public void reset() { + mCache.clear(); + } +} diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStore.java b/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStore.java index 923c4d653c41..446c9e5e5a95 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStore.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStore.java @@ -77,11 +77,11 @@ public void put(PageId pageId, } } catch (Exception e) { Files.deleteIfExists(pagePath); - if (e.getMessage().contains(ERROR_NO_SPACE_LEFT)) { + if (e.getMessage() != null && e.getMessage().contains(ERROR_NO_SPACE_LEFT)) { throw new ResourceExhaustedException( String.format("%s is full, configured with %d bytes", mRoot, mCapacity), e); } - throw new IOException("Failed to write file " + pagePath + " for page " + pageId); + throw new IOException("Failed to write file " + pagePath + " for page " + pageId, e); } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStoreDir.java b/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStoreDir.java index 7426af1c9bf4..3d13ba544313 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStoreDir.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStoreDir.java @@ -17,6 +17,7 @@ import alluxio.client.file.cache.PageInfo; import alluxio.client.file.cache.PageStore; import alluxio.client.file.cache.evictor.CacheEvictor; +import alluxio.client.quota.CacheScope; import com.google.common.base.Preconditions; import org.slf4j.Logger; @@ -25,6 +26,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.attribute.FileTime; import java.util.Optional; import java.util.function.Consumer; import java.util.regex.Matcher; @@ -104,13 +106,17 @@ private Optional getPageInfo(Path path) { Optional pageId = getPageId(path); if (pageId.isPresent()) { long pageSize; + long createdTime; try { pageSize = Files.size(path); + FileTime creationTime = (FileTime) Files.getAttribute(path, "creationTime"); + createdTime = creationTime.toMillis(); } catch (IOException e) { LOG.error("Failed to get file size for " + path, e); return Optional.empty(); } - return Optional.of(new PageInfo(pageId.get(), pageSize, this)); + return Optional.of(new PageInfo(pageId.get(), + pageSize, CacheScope.GLOBAL, this, createdTime)); } return Optional.empty(); } diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/store/PageStoreDir.java b/core/client/fs/src/main/java/alluxio/client/file/cache/store/PageStoreDir.java index cacd8aa397ba..0a3378b18588 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/store/PageStoreDir.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/store/PageStoreDir.java @@ -34,7 +34,7 @@ * Directory of page store. */ public interface PageStoreDir { - Logger LOG = LoggerFactory.getLogger(RocksPageStore.class); + Logger LOG = LoggerFactory.getLogger(PageStoreDir.class); /** * Create a list of PageStoreDir based on the configuration. diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/store/RocksPageStoreDir.java b/core/client/fs/src/main/java/alluxio/client/file/cache/store/RocksPageStoreDir.java index 628c9b386bf3..1f218ca56b44 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/store/RocksPageStoreDir.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/store/RocksPageStoreDir.java @@ -66,8 +66,11 @@ public void reset() throws IOException { @Override public void scanPages(Consumer> pageInfoConsumer) { + // Fix thread safety on this iterator or or demise RocksPageStore + // https://github.com/Alluxio/alluxio/issues/17131 try (CloseableIterator> pageIterator = - RocksUtils.createCloseableIterator(mPageStore.createNewInterator(), this::parsePageInfo)) { + RocksUtils.createCloseableIterator(mPageStore.createNewInterator(), this::parsePageInfo, + () -> null, null)) { Streams.stream(pageIterator).forEach(pageInfoConsumer); } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/options/InStreamOptions.java b/core/client/fs/src/main/java/alluxio/client/file/options/InStreamOptions.java index 31b6c8f2b870..711722831622 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/options/InStreamOptions.java +++ b/core/client/fs/src/main/java/alluxio/client/file/options/InStreamOptions.java @@ -13,6 +13,7 @@ import alluxio.client.ReadType; import alluxio.client.block.policy.BlockLocationPolicy; +import alluxio.client.block.policy.SpecificHostPolicy; import alluxio.client.file.FileSystemContext; import alluxio.client.file.URIStatus; import alluxio.conf.AlluxioConfiguration; @@ -91,7 +92,13 @@ public InStreamOptions(URIStatus status, @Nonnull OpenFilePOptions options, mStatus = status; mProtoOptions = openOptions; - mUfsReadLocationPolicy = context.getReadBlockLocationPolicy(alluxioConf); + if (options.hasUfsReadWorkerLocation()) { + int port = options.getUfsReadWorkerLocation().getRpcPort(); + mUfsReadLocationPolicy = new SpecificHostPolicy( + options.getUfsReadWorkerLocation().getHost(), port == 0 ? null : port); + } else { + mUfsReadLocationPolicy = context.getReadBlockLocationPolicy(alluxioConf); + } mPositionShort = false; } diff --git a/core/client/fs/src/main/java/alluxio/client/file/options/OutStreamOptions.java b/core/client/fs/src/main/java/alluxio/client/file/options/OutStreamOptions.java index 028ecda49ff5..48d2cf932c9b 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/options/OutStreamOptions.java +++ b/core/client/fs/src/main/java/alluxio/client/file/options/OutStreamOptions.java @@ -16,6 +16,7 @@ import alluxio.client.UnderStorageType; import alluxio.client.WriteType; import alluxio.client.block.policy.BlockLocationPolicy; +import alluxio.client.block.policy.SpecificHostPolicy; import alluxio.client.file.FileSystemContext; import alluxio.conf.AlluxioConfiguration; import alluxio.conf.PropertyKey; @@ -113,6 +114,11 @@ public OutStreamOptions(CreateFilePOptions options, FileSystemContext context, if (options.hasWriteType()) { mWriteType = WriteType.fromProto(options.getWriteType()); } + if (options.hasWorkerLocation()) { + int port = options.getWorkerLocation().getRpcPort(); + mLocationPolicy = new SpecificHostPolicy( + options.getWorkerLocation().getHost(), port == 0 ? null : port); + } } private OutStreamOptions(FileSystemContext context, AlluxioConfiguration alluxioConf) { diff --git a/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java index 5bc1e5ec8821..2341cb93c46b 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java @@ -21,7 +21,9 @@ import alluxio.client.file.options.UfsFileSystemOptions; import alluxio.conf.AlluxioConfiguration; import alluxio.exception.AlluxioException; +import alluxio.exception.FileDoesNotExistException; import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; @@ -30,6 +32,8 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.MountPOptions; @@ -39,16 +43,23 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.resource.CloseableResource; import alluxio.security.authorization.AclEntry; import alluxio.security.authorization.Mode; +import alluxio.underfs.Fingerprint; import alluxio.underfs.UfsFileStatus; import alluxio.underfs.UfsManager; import alluxio.underfs.UfsStatus; import alluxio.underfs.UnderFileSystem; import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; @@ -71,6 +82,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.Consumer; import java.util.stream.Collectors; import javax.annotation.concurrent.ThreadSafe; @@ -214,8 +226,10 @@ public URIStatus getStatus(AlluxioURI path) { public URIStatus getStatus(AlluxioURI path, final GetStatusPOptions options) { return callWithReturn(() -> { String ufsPath = path.getPath(); - return transformStatus(mUfs.get().isFile(ufsPath) - ? mUfs.get().getFileStatus(ufsPath) : mUfs.get().getDirectoryStatus(ufsPath)); + return transformStatus(mUfs.get().isFile(ufsPath) ? mUfs.get().getFileStatus(ufsPath, + GetFileStatusOptions.defaults() + .setIncludeRealContentHash(options.getIncludeRealContentHash())) : + mUfs.get().getDirectoryStatus(ufsPath)); }); } @@ -392,6 +406,46 @@ public void needsSync(AlluxioURI path) throws IOException, AlluxioException { throw new UnsupportedOperationException(); } + @Override + public Optional submitJob(JobRequest jobRequest) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean stopJob(JobDescription jobDescription) { + throw new UnsupportedOperationException(); + } + + @Override + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { + throw new UnsupportedOperationException(); + } + + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + throw new UnsupportedOperationException(); + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + throw new UnsupportedOperationException(); + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) + throws FileDoesNotExistException, IOException, AlluxioException { + throw new UnsupportedOperationException(); + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws IOException, AlluxioException { + throw new UnsupportedOperationException(); + } + /** * Transform UFS file/directory status to client-side status. * @@ -419,7 +473,11 @@ private URIStatus transformStatus(UfsStatus ufsStatus) { UfsFileStatus fileStatus = (UfsFileStatus) ufsStatus; info.setLength(fileStatus.getContentLength()); info.setBlockSizeBytes(fileStatus.getBlockSize()); - } else { + info.setUfsFingerprint( + Fingerprint.create(mUfs.get().getUnderFSType(), ufsStatus, fileStatus.getContentHash()) + .serialize()); + } + else { info.setLength(0); } return new URIStatus(info); diff --git a/core/client/fs/src/main/java/alluxio/client/meta/MetaMasterClient.java b/core/client/fs/src/main/java/alluxio/client/meta/MetaMasterClient.java index 02e4b7a5eca7..073048d86c5f 100644 --- a/core/client/fs/src/main/java/alluxio/client/meta/MetaMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/meta/MetaMasterClient.java @@ -15,10 +15,12 @@ import alluxio.grpc.BackupPRequest; import alluxio.grpc.MasterInfo; import alluxio.grpc.MasterInfoField; +import alluxio.grpc.ProxyStatus; import alluxio.wire.BackupStatus; import alluxio.wire.ConfigCheckReport; import java.io.IOException; +import java.util.List; import java.util.Set; import java.util.UUID; @@ -71,4 +73,11 @@ public interface MetaMasterClient extends Client { * @return the hostname of the master that did the checkpoint */ String checkpoint() throws IOException; + + /** + * Lists information of all known proxy instances from the primary master. + * + * @return the list of proxy status + */ + List listProxyStatus() throws IOException; } diff --git a/core/client/fs/src/main/java/alluxio/client/meta/RetryHandlingMetaMasterClient.java b/core/client/fs/src/main/java/alluxio/client/meta/RetryHandlingMetaMasterClient.java index 63e44aa2b38c..875074b76281 100644 --- a/core/client/fs/src/main/java/alluxio/client/meta/RetryHandlingMetaMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/meta/RetryHandlingMetaMasterClient.java @@ -18,9 +18,11 @@ import alluxio.grpc.CheckpointPOptions; import alluxio.grpc.GetConfigReportPOptions; import alluxio.grpc.GetMasterInfoPOptions; +import alluxio.grpc.ListProxyStatusPRequest; import alluxio.grpc.MasterInfo; import alluxio.grpc.MasterInfoField; import alluxio.grpc.MetaMasterClientServiceGrpc; +import alluxio.grpc.ProxyStatus; import alluxio.grpc.ServiceType; import alluxio.master.MasterClientContext; import alluxio.wire.BackupStatus; @@ -30,6 +32,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.List; import java.util.Set; import java.util.UUID; import javax.annotation.concurrent.ThreadSafe; @@ -106,4 +109,11 @@ public String checkpoint() throws IOException { .checkpoint(CheckpointPOptions.newBuilder().build()).getMasterHostname(), RPC_LOG, "Checkpoint", ""); } + + @Override + public List listProxyStatus() throws IOException { + return retryRPC(() -> mClient.listProxyStatus( + ListProxyStatusPRequest.getDefaultInstance()).getProxyStatusesList(), + RPC_LOG, "ListProxyStatus", ""); + } } diff --git a/core/client/fs/src/main/java/alluxio/util/FileSystemOptionsUtils.java b/core/client/fs/src/main/java/alluxio/util/FileSystemOptionsUtils.java index 9480d88ca004..c3cc86de9b28 100644 --- a/core/client/fs/src/main/java/alluxio/util/FileSystemOptionsUtils.java +++ b/core/client/fs/src/main/java/alluxio/util/FileSystemOptionsUtils.java @@ -35,6 +35,7 @@ import alluxio.grpc.ScheduleAsyncPersistencePOptions; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataPOptions; import alluxio.grpc.TtlAction; import alluxio.grpc.UnmountPOptions; import alluxio.security.authorization.Mode; @@ -169,6 +170,14 @@ public static ExistsPOptions existsDefaults(AlluxioConfiguration conf) { .build(); } + /** + * @param conf Alluxio configuration + * @return options based on the configuration + */ + public static SyncMetadataPOptions syncMetadataDefaults(AlluxioConfiguration conf) { + return SyncMetadataPOptions.newBuilder().build(); + } + /** * @param conf Alluxio configuration * @return options based on the configuration @@ -185,12 +194,14 @@ public static FileSystemMasterCommonPOptions commonDefaults(AlluxioConfiguration public static FileSystemMasterCommonPOptions commonDefaults(AlluxioConfiguration conf, boolean withOpId) { FileSystemMasterCommonPOptions.Builder builder = FileSystemMasterCommonPOptions.newBuilder() - .setSyncIntervalMs(conf.getMs(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL)) .setTtl(conf.getMs(PropertyKey.USER_FILE_CREATE_TTL)) .setTtlAction(conf.getEnum(PropertyKey.USER_FILE_CREATE_TTL_ACTION, TtlAction.class)); if (withOpId && conf.getBoolean(PropertyKey.USER_FILE_INCLUDE_OPERATION_ID)) { builder.setOperationId(new OperationId(UUID.randomUUID()).toFsProto()); } + if (conf.isSetByUser(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL)) { + builder.setSyncIntervalMs(conf.getMs(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL)); + } return builder.build(); } @@ -342,10 +353,12 @@ public static SetAttributePOptions setAttributeClientDefaults(AlluxioConfigurati // Specifically set and override *only* the metadata sync interval // Setting other attributes by default will make the server think the user is intentionally // setting the values. Most fields withinSetAttributePOptions are set by inclusion + FileSystemMasterCommonPOptions.Builder builder = FileSystemMasterCommonPOptions.newBuilder(); + if (conf.isSetByUser(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL)) { + builder.setSyncIntervalMs(conf.getMs(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL)); + } return SetAttributePOptions.newBuilder() - .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() - .setSyncIntervalMs(conf.getMs(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL)) - .build()) + .setCommonOptions(builder.build()) .build(); } diff --git a/core/client/fs/src/test/java/alluxio/client/block/options/GetWorkerReportOptionsTest.java b/core/client/fs/src/test/java/alluxio/client/block/options/GetWorkerReportOptionsTest.java new file mode 100644 index 000000000000..2f19e60e93d6 --- /dev/null +++ b/core/client/fs/src/test/java/alluxio/client/block/options/GetWorkerReportOptionsTest.java @@ -0,0 +1,43 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.block.options; + +import alluxio.grpc.WorkerInfoField; +import alluxio.grpc.WorkerRange; + +import org.junit.Assert; +import org.junit.Test; + +public class GetWorkerReportOptionsTest { + /** + * Check whether WorkerInfoField class and WorkerInfoField in proto file has identical fields. + */ + @Test + public void identicalFieldsForWorkerInfoAndRange() { + for (GetWorkerReportOptions.WorkerInfoField field : + GetWorkerReportOptions.WorkerInfoField.values()) { + Assert.assertEquals(field, GetWorkerReportOptions + .WorkerInfoField.fromProto(field.toProto())); + } + for (GetWorkerReportOptions.WorkerRange range : GetWorkerReportOptions.WorkerRange.values()) { + Assert.assertEquals(range, GetWorkerReportOptions.WorkerRange.fromProto(range.toProto())); + } + + for (WorkerInfoField field : WorkerInfoField.values()) { + Assert.assertEquals(field, + GetWorkerReportOptions.WorkerInfoField.fromProto(field).toProto()); + } + for (WorkerRange range : WorkerRange.values()) { + Assert.assertEquals(range, GetWorkerReportOptions.WorkerRange.fromProto(range).toProto()); + } + } +} diff --git a/core/client/fs/src/test/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicyTest.java b/core/client/fs/src/test/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicyTest.java new file mode 100644 index 000000000000..cb422716a831 --- /dev/null +++ b/core/client/fs/src/test/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicyTest.java @@ -0,0 +1,316 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.block.policy; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import alluxio.client.block.BlockWorkerInfo; +import alluxio.client.block.policy.options.GetWorkerOptions; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.Configuration; +import alluxio.conf.InstancedConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.wire.BlockInfo; +import alluxio.wire.WorkerNetAddress; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ThreadLocalRandom; +import java.util.stream.Collectors; +import java.util.stream.LongStream; + +public class CapacityBasedDeterministicHashPolicyTest { + + private static final CapacityBasedDeterministicHashPolicy NO_SHARDING_POLICY; + private static final CapacityBasedDeterministicHashPolicy THREE_SHARDS_POLICY; + private static final AlluxioConfiguration NO_SHARDING_CONF; + private static final AlluxioConfiguration THREE_SHARDS_CONF; + + static { + InstancedConfiguration noShardingConf = Configuration.copyGlobal(); + noShardingConf.set( + PropertyKey.USER_UFS_BLOCK_READ_LOCATION_POLICY_DETERMINISTIC_HASH_SHARDS, 1); + NO_SHARDING_CONF = noShardingConf; + InstancedConfiguration threeShardsConf = Configuration.copyGlobal(); + threeShardsConf.set( + PropertyKey.USER_UFS_BLOCK_READ_LOCATION_POLICY_DETERMINISTIC_HASH_SHARDS, 3); + THREE_SHARDS_CONF = threeShardsConf; + NO_SHARDING_POLICY = new CapacityBasedDeterministicHashPolicy(NO_SHARDING_CONF); + THREE_SHARDS_POLICY = new CapacityBasedDeterministicHashPolicy(THREE_SHARDS_CONF); + } + + @Test + public void basic() { + class TestPolicy extends CapacityBasedDeterministicHashPolicy { + public TestPolicy(AlluxioConfiguration conf) { + super(conf); + } + + @Override + protected long hashBlockId(long blockId) { + return blockId; + } + + @Override + protected BlockWorkerInfo getRandomCandidate(List candidates) { + // always pick the last candidate + Preconditions.checkArgument(candidates.size() >= 1); + return candidates.get(candidates.size() - 1); + } + } + + TestPolicy policy = new TestPolicy(NO_SHARDING_CONF); + + // total capacity: 100 + List blockWorkerInfos = ImmutableList.of( + new BlockWorkerInfo(new WorkerNetAddress().setHost("0"), 10, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("1"), 20, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("2"), 20, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("3"), 0, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("4"), 50, 0) + ); + BlockInfo blockInfo = new BlockInfo(); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockWorkerInfos(blockWorkerInfos) + .setBlockInfo(blockInfo); + + blockInfo.setBlockId(1); + assertEquals("0", policy.getWorker(options).get().getHost()); + blockInfo.setBlockId(5); + assertEquals("0", policy.getWorker(options).get().getHost()); + blockInfo.setBlockId(10); + assertEquals("1", policy.getWorker(options).get().getHost()); + blockInfo.setBlockId(30); + assertEquals("2", policy.getWorker(options).get().getHost()); + blockInfo.setBlockId(50); + assertEquals("4", policy.getWorker(options).get().getHost()); + } + + @Test + public void sharding() { + class TestPolicy extends CapacityBasedDeterministicHashPolicy { + private final long mTotalCapacity; + + public TestPolicy(AlluxioConfiguration conf, long totalCapacity) { + super(conf); + mTotalCapacity = totalCapacity; + } + + @Override + protected long hashBlockId(long blockId) { + // this simulates a hash function that generates a hash value that is either + // the block id itself, or its complement against total capacity + return mTotalCapacity - blockId; + } + + @Override + protected BlockWorkerInfo getRandomCandidate(List candidates) { + // always pick the last candidate + Preconditions.checkArgument(candidates.size() >= 1); + return candidates.get(candidates.size() - 1); + } + } + + // total capacity: 100 + List blockWorkerInfos = ImmutableList.of( + new BlockWorkerInfo(new WorkerNetAddress().setHost("0"), 10, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("1"), 20, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("2"), 20, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("3"), 0, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("4"), 50, 0) + ); + BlockInfo blockInfo = new BlockInfo(); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockWorkerInfos(blockWorkerInfos) + .setBlockInfo(blockInfo); + + InstancedConfiguration shard4Conf = Configuration.copyGlobal(); + shard4Conf + .set(PropertyKey.USER_UFS_BLOCK_READ_LOCATION_POLICY_DETERMINISTIC_HASH_SHARDS, 4); + TestPolicy policyShard4 = new TestPolicy(shard4Conf, 100); + TestPolicy policyShard3 = new TestPolicy(THREE_SHARDS_CONF, 100); + + // for 3 shards policy, the block ids are hashed 3 times, + // therefore the effective hash value is the block id's complement + // for 4 shards policy, the hash value is the same as the block id + blockInfo.setBlockId(1); + assertEquals("4", policyShard3.getWorker(options).get().getHost()); + assertEquals("0", policyShard4.getWorker(options).get().getHost()); + blockInfo.setBlockId(5); + assertEquals("4", policyShard3.getWorker(options).get().getHost()); + assertEquals("0", policyShard4.getWorker(options).get().getHost()); + blockInfo.setBlockId(10); + assertEquals("4", policyShard3.getWorker(options).get().getHost()); + assertEquals("1", policyShard4.getWorker(options).get().getHost()); + blockInfo.setBlockId(60); + assertEquals("2", policyShard3.getWorker(options).get().getHost()); + assertEquals("4", policyShard4.getWorker(options).get().getHost()); + blockInfo.setBlockId(90); + assertEquals("1", policyShard3.getWorker(options).get().getHost()); + assertEquals("4", policyShard4.getWorker(options).get().getHost()); + } + + /** + * Tests that the probability a worker is chosen is linear to its normalized capacity, + * provided uniform block requests distribution. + */ + @Test + public void linearDistribution() { + final long capacityUpperBound = 1000; + final int numWorkers = 100; + final int numTrials = 100000; + final List capacities = + LongStream.generate(() -> ThreadLocalRandom.current().nextLong(capacityUpperBound)) + .limit(numWorkers).boxed().collect(Collectors.toList()); + final long totalCapacity = capacities.stream().reduce(0L, Long::sum); + + ImmutableMap.Builder workersBuilder = ImmutableMap.builder(); + for (int i = 0; i < numWorkers; i++) { + // used bytes shouldn't matter in case of CapacityBasedDeterministicHashPolicy; + // random number does not affect the outcome of the policy + long randomUsedBytes = ThreadLocalRandom.current().nextLong(); + WorkerNetAddress addr = new WorkerNetAddress().setHost(String.valueOf(i)); + BlockWorkerInfo workerInfo = new BlockWorkerInfo(addr, capacities.get(i), randomUsedBytes); + workersBuilder.put(addr, workerInfo); + } + Map workers = workersBuilder.build(); + + BlockInfo blockInfo = new BlockInfo(); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockInfo(blockInfo) + .setBlockWorkerInfos(ImmutableList.copyOf(workers.values())); + // worker to number of hits map + Map hits = new HashMap<>(); + for (int i = 0; i < numTrials; i++) { + // assume uniform block distribution + blockInfo.setBlockId(ThreadLocalRandom.current().nextLong()); + Optional chosen = THREE_SHARDS_POLICY.getWorker(options); + assertTrue(chosen.isPresent()); + hits.computeIfPresent(chosen.get(), (k, v) -> v + 1); + hits.putIfAbsent(chosen.get(), 1L); + } + // the chance that workers of a particular capacity are chosen converges to + // the ratio of their capacity over total capacity, as the number of trials increases + final double confidence = 0.01; + for (Map.Entry entry : hits.entrySet()) { + long capacity = workers.get(entry.getKey()).getCapacityBytes(); + double normalizedCapacity = capacity * 1.0 / totalCapacity; + double normalizedHits = entry.getValue() * 1.0 / numTrials; + assertTrue(Math.abs(normalizedCapacity - normalizedHits) < confidence); + } + } + + /** + * Tests that the outcome of the policy is deterministic if sharding is turned off. + */ + @Test + public void deterministicChoice() { + List workerInfos = generateBlockWorkerInfos(100, 1); + BlockInfo blockInfo = new BlockInfo().setBlockId(1); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockInfo(blockInfo) + .setBlockWorkerInfos(workerInfos); + WorkerNetAddress chosen = NO_SHARDING_POLICY.getWorker(options).get(); + for (int i = 0; i < 10000; i++) { + Optional workerInfo = NO_SHARDING_POLICY.getWorker(options); + assertTrue(workerInfo.isPresent()); + assertEquals(chosen, workerInfo.get()); + } + } + + /** + * Tests that when sharding is enabled (shards >1), the upper bound of the number of all + * possibly selected workers is the configured shards value. + * + * Note: the lower bound is 1. + */ + @Test + public void numShardsDoesNotExceedConfiguredValue() { + List workerInfos = generateBlockWorkerInfos(100, 1); + BlockInfo blockInfo = new BlockInfo().setBlockId(1); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockInfo(blockInfo) + .setBlockWorkerInfos(workerInfos); + for (int numShards = 1; numShards < 20; numShards++) { + InstancedConfiguration conf = Configuration.copyGlobal(); + conf.set(PropertyKey.USER_UFS_BLOCK_READ_LOCATION_POLICY_DETERMINISTIC_HASH_SHARDS, + numShards); + CapacityBasedDeterministicHashPolicy policy = new CapacityBasedDeterministicHashPolicy(conf); + Set seenWorkers = new HashSet<>(); + for (int i = 0; i < 1000; i++) { + Optional workerInfo = policy.getWorker(options); + assertTrue(workerInfo.isPresent()); + seenWorkers.add(workerInfo.get()); + } + assertTrue(seenWorkers.size() <= numShards); + } + } + + @Test + public void zeroCapacityWorker() { + List workerInfos = generateBlockWorkerInfos(10, 0); + BlockInfo blockInfo = new BlockInfo().setBlockId(1); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockInfo(blockInfo) + .setBlockWorkerInfos(workerInfos); + assertFalse(NO_SHARDING_POLICY.getWorker(options).isPresent()); + } + + /** + * Tests that two workers with the same capacity has a well-defined order, independent of the + * order they are present in the worker list. + */ + @Test + public void stability() { + List workerInfos = new ArrayList<>(generateBlockWorkerInfos(10, 100)); + BlockInfo blockInfo = new BlockInfo().setBlockId(1); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockInfo(blockInfo) + .setBlockWorkerInfos(workerInfos); + assertTrue(NO_SHARDING_POLICY.getWorker(options).isPresent()); + WorkerNetAddress chosen = NO_SHARDING_POLICY.getWorker(options).get(); + for (int i = 0; i < 100; i++) { + Collections.shuffle(workerInfos); + assertTrue(NO_SHARDING_POLICY.getWorker(options).isPresent()); + assertEquals(chosen, NO_SHARDING_POLICY.getWorker(options).get()); + } + } + + /** + * Generates a list of workers with the same capacity, and with the index as its hostname. + */ + private List generateBlockWorkerInfos(int numWorkers, int capacity) { + ImmutableList.Builder workerInfoBuilder = ImmutableList.builder(); + for (int i = 0; i < numWorkers; i++) { + // used bytes shouldn't matter in case of CapacityBasedDeterministicHashPolicy; + // random number does not affect the outcome of the policy + long randomUsedBytes = ThreadLocalRandom.current().nextLong(); + WorkerNetAddress addr = new WorkerNetAddress().setHost(String.valueOf(i)); + BlockWorkerInfo workerInfo = new BlockWorkerInfo(addr, capacity, randomUsedBytes); + workerInfoBuilder.add(workerInfo); + } + return workerInfoBuilder.build(); + } +} diff --git a/core/client/fs/src/test/java/alluxio/client/block/stream/TestBlockInStream.java b/core/client/fs/src/test/java/alluxio/client/block/stream/TestBlockInStream.java index 842f532ffca2..fd6f756525cc 100644 --- a/core/client/fs/src/test/java/alluxio/client/block/stream/TestBlockInStream.java +++ b/core/client/fs/src/test/java/alluxio/client/block/stream/TestBlockInStream.java @@ -11,8 +11,11 @@ package alluxio.client.block.stream; +import alluxio.network.protocol.databuffer.DataBuffer; import alluxio.wire.WorkerNetAddress; +import com.google.common.annotations.VisibleForTesting; + import java.io.IOException; import java.nio.ByteBuffer; @@ -56,6 +59,16 @@ public boolean isClosed() { return mClosed; } + @VisibleForTesting + public DataReader getDataReader() { + return mDataReader; + } + + @VisibleForTesting + public DataBuffer getCurrentChunk() { + return mCurrentChunk; + } + @Override public void close() throws IOException { mClosed = true; diff --git a/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataReader.java b/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataReader.java index b7dc2cae0b35..ad36d2a6c4ac 100644 --- a/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataReader.java +++ b/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataReader.java @@ -14,6 +14,8 @@ import alluxio.network.protocol.databuffer.DataBuffer; import alluxio.network.protocol.databuffer.NioDataBuffer; +import com.google.common.base.Preconditions; + import java.io.IOException; import java.nio.ByteBuffer; import javax.annotation.Nullable; @@ -38,6 +40,7 @@ public TestDataReader(byte[] data, long chunkSize, long offset, long length) { @Override @Nullable public DataBuffer readChunk() { + Preconditions.checkState(!mClosed, "reader is closed"); if (mPos >= mEnd || mPos >= mData.length) { return null; } diff --git a/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataWriter.java b/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataWriter.java index d9917c1364e0..e83690db2119 100644 --- a/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataWriter.java +++ b/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataWriter.java @@ -15,6 +15,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Optional; /** * A {@link DataWriter} which writes data to a bytebuffer. @@ -26,6 +27,11 @@ public TestDataWriter(ByteBuffer buffer) { mBuffer = buffer; } + @Override + public Optional getUfsContentHash() { + return Optional.empty(); + } + @Override public void writeChunk(ByteBuf chunk) throws IOException { try { diff --git a/core/client/fs/src/test/java/alluxio/client/file/AlluxioFileInStreamTest.java b/core/client/fs/src/test/java/alluxio/client/file/AlluxioFileInStreamTest.java index b77f28d10ed6..04271a1d8d16 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/AlluxioFileInStreamTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/AlluxioFileInStreamTest.java @@ -13,6 +13,7 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.junit.Assume.assumeTrue; @@ -826,6 +827,109 @@ public void triggerAsyncOnClose() throws Exception { assertTrue(mTestStream.triggerAsyncCaching(mInStreams.get(mInStreams.size() - 1))); } + @Test + public void unbufferAroundRead() throws Exception { + int bufferSize = (int) (mFileSize / 2); + byte[] buffer = new byte[bufferSize]; + mTestStream.read(buffer); + assertArrayEquals(BufferUtils.getIncreasingByteArray(bufferSize), buffer); + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals(BufferUtils.getIncreasingByteArray(bufferSize, bufferSize), buffer); + unbuffer(); + } + + @Test + public void unbufferAroundPositionRead() throws Exception { + int bufferSize = (int) (mFileSize / 4); + byte[] buffer = new byte[bufferSize]; + mTestStream.positionedRead(bufferSize, buffer, 0, bufferSize); + assertArrayEquals(BufferUtils.getIncreasingByteArray(bufferSize, bufferSize), buffer); + unbuffer(); + mTestStream.positionedRead(bufferSize, buffer, 0, bufferSize); + assertArrayEquals(BufferUtils.getIncreasingByteArray(bufferSize, bufferSize), buffer); + unbuffer(); + } + + @Test + public void unbufferAroundSeek() throws Exception { + int bufferSize = (int) (mFileSize / 8); + int seekSize = (int) (mFileSize / 8); + byte[] buffer = new byte[bufferSize]; + unbuffer(); + mTestStream.seek(seekSize); + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals(BufferUtils.getIncreasingByteArray(seekSize, bufferSize), buffer); + } + + @Test + public void unbufferAroundSkip() throws Exception { + int bufferSize = (int) (mFileSize / 8); + int skipSize = (int) (mFileSize / 8); + byte[] buffer = new byte[bufferSize]; + unbuffer(); + mTestStream.read(buffer); + unbuffer(); + mTestStream.skip(skipSize); + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals(BufferUtils.getIncreasingByteArray(skipSize + bufferSize, bufferSize), + buffer); + } + + @Test + public void unbufferOnClosedFile() throws Exception { + mTestStream.close(); + unbuffer(); + } + + @Test + public void multipleUnbuffers() throws Exception { + byte[] buffer = new byte[(int) (mFileSize / 2)]; + unbuffer(); + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals(BufferUtils.getIncreasingByteArray((int) (mFileSize / 2)), buffer); + unbuffer(); + unbuffer(); + } + + @Test + public void unbufferMultipleReads() throws IOException { + int bufferSize = (int) (mFileSize / 8); + byte[] buffer = new byte[bufferSize]; + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals(BufferUtils.getIncreasingByteArray(bufferSize), buffer); + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals( + BufferUtils.getIncreasingByteArray(bufferSize, bufferSize), buffer); + mTestStream.read(buffer); + assertArrayEquals( + BufferUtils.getIncreasingByteArray(bufferSize * 2, bufferSize), buffer); + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals( + BufferUtils.getIncreasingByteArray(bufferSize * 3, bufferSize), buffer); + mTestStream.read(buffer); + assertArrayEquals( + BufferUtils.getIncreasingByteArray(bufferSize * 4, bufferSize), buffer); + mTestStream.read(buffer); + assertArrayEquals( + BufferUtils.getIncreasingByteArray(bufferSize * 5, bufferSize), buffer); + unbuffer(); + } + + private void unbuffer() { + mTestStream.unbuffer(); + for (TestBlockInStream stream : mInStreams) { + assertNull(stream.getCurrentChunk()); + assertNull(stream.getDataReader()); + } + } + /** * Tests that reading dataRead bytes into a buffer will properly write those bytes to the cache * streams and that the correct bytes are read from the {@link FileInStream}. diff --git a/core/client/fs/src/test/java/alluxio/client/file/BaseFileSystemTest.java b/core/client/fs/src/test/java/alluxio/client/file/BaseFileSystemTest.java index 44e3a51a3e66..fa8efe0a2f4e 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/BaseFileSystemTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/BaseFileSystemTest.java @@ -24,10 +24,6 @@ import static org.mockito.Mockito.when; import alluxio.AlluxioURI; -import alluxio.ClientContext; -import alluxio.TestLoggerRule; -import alluxio.conf.Configuration; -import alluxio.conf.InstancedConfiguration; import alluxio.conf.PropertyKey; import alluxio.grpc.Bits; import alluxio.grpc.CreateDirectoryPOptions; @@ -41,16 +37,11 @@ import alluxio.grpc.RenamePOptions; import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.UnmountPOptions; -import alluxio.resource.CloseableResource; import alluxio.util.FileSystemOptionsUtils; import alluxio.wire.FileInfo; -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.powermock.api.mockito.PowerMockito; import org.powermock.core.classloader.annotations.PrepareForTest; import org.powermock.modules.junit4.PowerMockRunner; @@ -62,63 +53,7 @@ */ @RunWith(PowerMockRunner.class) @PrepareForTest({FileSystemContext.class, FileSystemMasterClient.class}) -public final class BaseFileSystemTest { - - private static final RuntimeException EXCEPTION = new RuntimeException("test exception"); - private static final String SHOULD_HAVE_PROPAGATED_MESSAGE = - "Exception should have been propagated"; - - private InstancedConfiguration mConf = Configuration.copyGlobal(); - - @Rule - private TestLoggerRule mTestLogger = new TestLoggerRule(); - - private FileSystem mFileSystem; - private FileSystemContext mFileContext; - private ClientContext mClientContext; - private FileSystemMasterClient mFileSystemMasterClient; - - private class DummyAlluxioFileSystem extends BaseFileSystem { - public DummyAlluxioFileSystem(FileSystemContext fsContext) { - super(fsContext); - } - } - - /** - * Sets up the file system and the context before a test runs. - */ - @Before - public void before() { - mConf.set(PropertyKey.USER_FILE_INCLUDE_OPERATION_ID, false); - mClientContext = ClientContext.create(mConf); - mFileContext = PowerMockito.mock(FileSystemContext.class); - mFileSystemMasterClient = PowerMockito.mock(FileSystemMasterClient.class); - when(mFileContext.acquireMasterClientResource()).thenReturn( - new CloseableResource(mFileSystemMasterClient) { - @Override - public void closeResource() { - // Noop. - } - }); - when(mFileContext.getClientContext()).thenReturn(mClientContext); - when(mFileContext.getClusterConf()).thenReturn(mConf); - when(mFileContext.getPathConf(any())).thenReturn(mConf); - when(mFileContext.getUriValidationEnabled()).thenReturn(true); - mFileSystem = new DummyAlluxioFileSystem(mFileContext); - } - - @After - public void after() { - mConf = Configuration.copyGlobal(); - } - - /** - * Verifies and releases the master client after a test with a filesystem operation. - */ - public void verifyFilesystemContextAcquiredAndReleased() { - verify(mFileContext).acquireMasterClientResource(); - } - +public final class BaseFileSystemTest extends FileSystemTestBase { /** * Tests the creation of a file via the * {@link BaseFileSystem#createFile(AlluxioURI, CreateFilePOptions)} method. diff --git a/core/client/fs/src/test/java/alluxio/client/file/FileSystemTestBase.java b/core/client/fs/src/test/java/alluxio/client/file/FileSystemTestBase.java new file mode 100644 index 000000000000..1c1c1ae3de41 --- /dev/null +++ b/core/client/fs/src/test/java/alluxio/client/file/FileSystemTestBase.java @@ -0,0 +1,89 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.file; + +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import alluxio.ClientContext; +import alluxio.TestLoggerRule; +import alluxio.conf.Configuration; +import alluxio.conf.InstancedConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.resource.CloseableResource; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.powermock.api.mockito.PowerMockito; + +/** + * Test base for {@link FileSystem} related test. + */ +public class FileSystemTestBase { + + protected static final RuntimeException EXCEPTION = new RuntimeException("test exception"); + protected static final String SHOULD_HAVE_PROPAGATED_MESSAGE = + "Exception should have been propagated"; + + protected InstancedConfiguration mConf = Configuration.copyGlobal(); + + @Rule + protected TestLoggerRule mTestLogger = new TestLoggerRule(); + + protected FileSystem mFileSystem; + protected FileSystemContext mFileContext; + protected ClientContext mClientContext; + protected FileSystemMasterClient mFileSystemMasterClient; + + private class DummyAlluxioFileSystem extends BaseFileSystem { + public DummyAlluxioFileSystem(FileSystemContext fsContext) { + super(fsContext); + } + } + + /** + * Sets up the file system and the context before a test runs. + */ + @Before + public void before() { + mConf.set(PropertyKey.USER_FILE_INCLUDE_OPERATION_ID, false); + mClientContext = ClientContext.create(mConf); + mFileContext = PowerMockito.mock(FileSystemContext.class); + mFileSystemMasterClient = PowerMockito.mock(FileSystemMasterClient.class); + when(mFileContext.acquireMasterClientResource()).thenReturn( + new CloseableResource(mFileSystemMasterClient) { + @Override + public void closeResource() { + // Noop. + } + }); + when(mFileContext.getClientContext()).thenReturn(mClientContext); + when(mFileContext.getClusterConf()).thenReturn(mConf); + when(mFileContext.getPathConf(any())).thenReturn(mConf); + when(mFileContext.getUriValidationEnabled()).thenReturn(true); + mFileSystem = new DummyAlluxioFileSystem(mFileContext); + } + + @After + public void after() { + mConf = Configuration.copyGlobal(); + } + + /** + * Verifies and releases the master client after a test with a filesystem operation. + */ + public void verifyFilesystemContextAcquiredAndReleased() { + verify(mFileContext).acquireMasterClientResource(); + } +} diff --git a/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java b/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java index 149a1bcd4619..67642b512829 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java +++ b/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java @@ -14,6 +14,7 @@ import alluxio.AlluxioURI; import alluxio.exception.status.AlluxioStatusException; import alluxio.exception.status.UnavailableException; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CheckConsistencyPOptions; import alluxio.grpc.CompleteFilePOptions; @@ -23,6 +24,8 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.MountPOptions; @@ -31,7 +34,12 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UpdateUfsModePOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.security.authorization.AclEntry; import alluxio.wire.MountPointInfo; import alluxio.wire.SyncPointInfo; @@ -42,6 +50,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.Consumer; /** @@ -234,4 +243,43 @@ public void close() throws IOException { @Override public void needsSync(AlluxioURI path) throws AlluxioStatusException { } + + @Override + public Optional submitJob(JobRequest job) { + return Optional.empty(); + } + + @Override + public boolean stopJob(JobDescription jobDescription) { + return false; + } + + @Override + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { + return null; + } + + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException { + return null; + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException { + return null; + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) throws AlluxioStatusException { + return null; + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws AlluxioStatusException { + return null; + } } diff --git a/core/client/fs/src/test/java/alluxio/client/file/cache/HangingPageStore.java b/core/client/fs/src/test/java/alluxio/client/file/cache/HangingPageStore.java index b8cf2f239544..fde5fafbc3c2 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/cache/HangingPageStore.java +++ b/core/client/fs/src/test/java/alluxio/client/file/cache/HangingPageStore.java @@ -20,6 +20,7 @@ import java.nio.ByteBuffer; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; /** * A PageStore can hang on put, get or delete. @@ -29,6 +30,7 @@ class HangingPageStore extends LocalPageStore { private AtomicBoolean mGetHanging = new AtomicBoolean(false); private AtomicBoolean mPutHanging = new AtomicBoolean(false); private AtomicInteger mPut = new AtomicInteger(0); + private AtomicLong mStopHangingThread = new AtomicLong(-1); public HangingPageStore(PageStoreOptions options) { super(options); @@ -45,6 +47,7 @@ public void delete(PageId pageId) throws IOException, PageNotFoundException { public int get(PageId pageId, int pageOffset, int bytesToRead, PageReadTargetBuffer target, boolean isTemporary) throws IOException, PageNotFoundException { + checkStopHanging(); // never quit while (mGetHanging.get()) {} return super.get(pageId, pageOffset, bytesToRead, target, isTemporary); @@ -52,12 +55,20 @@ public int get(PageId pageId, int pageOffset, int bytesToRead, PageReadTargetBuf @Override public void put(PageId pageId, ByteBuffer page, boolean isTemporary) throws IOException { + checkStopHanging(); // never quit while (mPutHanging.get()) {} super.put(pageId, page, isTemporary); mPut.getAndIncrement(); } + private void checkStopHanging() { + if (mStopHangingThread.get() == Thread.currentThread().getId()) { + mPutHanging.set(false); + mGetHanging.set(false); + } + } + /** * @param value if delete operation hangs */ @@ -79,6 +90,15 @@ public void setPutHanging(boolean value) { mPutHanging.set(value); } + /** + * Set a thread id so that if a thread with the given id reaches + * the line where it should hang, it will disable hanging. + * @param id the thread id to stop the hanging + */ + public void setStopHangingThread(long id) { + mStopHangingThread.set(id); + } + /** * @return number of put operations */ diff --git a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java index 570f5110c50f..ab17f2a98a7d 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java @@ -32,6 +32,7 @@ import alluxio.exception.FileIncompleteException; import alluxio.exception.InvalidPathException; import alluxio.exception.OpenDirectoryException; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; @@ -39,6 +40,8 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.MountPOptions; @@ -48,7 +51,12 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; import alluxio.security.authorization.AclEntry; @@ -81,6 +89,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Random; import java.util.concurrent.ThreadLocalRandom; import java.util.function.BiConsumer; @@ -522,6 +531,38 @@ protected Stopwatch createUnstartedStopwatch() { Assert.assertEquals(timeSource.get(StepTicker.Type.CACHE_MISS), timeReadExternal); } + @Test + public void testUnbuffer() throws Exception { + int fileSize = mPageSize; + byte[] testData = BufferUtils.getIncreasingByteArray(fileSize); + ByteArrayCacheManager manager = new ByteArrayCacheManager(); + LocalCacheFileInStream stream = setupWithSingleFile(testData, manager); + + int partialReadSize = fileSize / 5; + int offset = fileSize / 5; + + byte[] cacheMiss = new byte[partialReadSize]; + stream.unbuffer(); + stream.seek(offset); + stream.unbuffer(); + Assert.assertEquals(partialReadSize, stream.read(cacheMiss)); + stream.unbuffer(); + Assert.assertArrayEquals( + Arrays.copyOfRange(testData, offset, offset + partialReadSize), cacheMiss); + Assert.assertEquals(0, manager.mPagesServed); + Assert.assertEquals(1, manager.mPagesCached); + + byte[] cacheHit = new byte[partialReadSize]; + stream.unbuffer(); + stream.seek(offset); + stream.unbuffer(); + Assert.assertEquals(partialReadSize, stream.read(cacheHit)); + stream.unbuffer(); + Assert.assertArrayEquals( + Arrays.copyOfRange(testData, offset, offset + partialReadSize), cacheHit); + Assert.assertEquals(1, manager.mPagesServed); + } + private LocalCacheFileInStream setupWithSingleFile(byte[] data, CacheManager manager) throws Exception { Map files = new HashMap<>(); @@ -862,6 +903,47 @@ public void needsSync(AlluxioURI path) { throw new UnsupportedOperationException(); } + @Override + public Optional submitJob(JobRequest jobRequest) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean stopJob(JobDescription jobDescription) { + throw new UnsupportedOperationException(); + } + + @Override + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { + throw new UnsupportedOperationException(); + } + + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + return null; + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, + SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + return null; + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) + throws FileDoesNotExistException, IOException, AlluxioException { + return null; + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws IOException, AlluxioException { + return null; + } + @Override public void close() throws IOException { throw new UnsupportedOperationException(); diff --git a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheManagerTest.java b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheManagerTest.java index 4a7a70695ee8..f564d4035ed2 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheManagerTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheManagerTest.java @@ -46,6 +46,8 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.junit.After; +import org.junit.Assume; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -55,8 +57,10 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.file.Paths; +import java.util.HashSet; import java.util.List; import java.util.Optional; +import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -110,6 +114,11 @@ public void before() throws Exception { mCacheManager = createLocalCacheManager(); } + @After + public void after() throws Exception { + mCacheManager.close(); + } + private byte[] page(int i, int pageLen) { return BufferUtils.getIncreasingByteArray(i, pageLen); } @@ -165,13 +174,13 @@ public void createNonexistentRootDirAsyncRestore() throws Exception { } @Test - public void createUnwriableRootDirSyncRestore() throws Exception { + public void createUnwritableRootDirSyncRestore() throws Exception { File root = mTemp.newFolder(); mConf.set(PropertyKey.USER_CLIENT_CACHE_ASYNC_RESTORE_ENABLED, false); mConf.set(PropertyKey.USER_CLIENT_CACHE_DIRS, root.getAbsolutePath()); mCacheManagerOptions = CacheManagerOptions.create(mConf); + Assume.assumeTrue(root.setWritable(false)); try { - root.setWritable(false); mPageMetaStore = new DefaultPageMetaStore(PageStoreDir.createPageStoreDirs(mCacheManagerOptions)); LocalCacheManager.create(mCacheManagerOptions, mPageMetaStore); @@ -184,13 +193,13 @@ public void createUnwriableRootDirSyncRestore() throws Exception { } @Test - public void createUnwriableRootDirAsyncRestore() throws Exception { + public void createUnwritableRootDirAsyncRestore() throws Exception { File root = mTemp.newFolder(); mConf.set(PropertyKey.USER_CLIENT_CACHE_ASYNC_RESTORE_ENABLED, true); mConf.set(PropertyKey.USER_CLIENT_CACHE_DIRS, root.getAbsolutePath()); mCacheManagerOptions = CacheManagerOptions.create(mConf); + Assume.assumeTrue(root.setWritable(false)); try { - root.setWritable(false); mPageMetaStore = new DefaultPageMetaStore(PageStoreDir.createPageStoreDirs(mCacheManagerOptions)); mCacheManager = @@ -711,8 +720,8 @@ public void syncRestoreUnwritableRootDir() throws Exception { String rootDir = mPageStoreOptions.getRootDir().toString(); FileUtils.deletePathRecursively(rootDir); File rootParent = new File(rootDir).getParentFile(); + Assume.assumeTrue(rootParent.setWritable(false)); try { - rootParent.setWritable(false); mPageMetaStore = new DefaultPageMetaStore(ImmutableList.of(dir)); LocalCacheManager.create(mCacheManagerOptions, mPageMetaStore); } catch (Exception e) { @@ -734,7 +743,7 @@ public void asyncRestoreUnwritableRootDir() throws Exception { String rootDir = mPageStoreOptions.getRootDir().toString(); FileUtils.deletePathRecursively(rootDir); File rootParent = new File(rootDir).getParentFile(); - rootParent.setWritable(false); + Assume.assumeTrue(rootParent.setWritable(false)); try { mPageMetaStore = new DefaultPageMetaStore(ImmutableList.of(dir)); mCacheManager = LocalCacheManager.create(mCacheManagerOptions, mPageMetaStore); @@ -799,6 +808,7 @@ public void asyncRestoreWithMorePagesThanCapacity() throws Exception { @Test public void asyncCache() throws Exception { + // this must be smaller than the number of locks in the page store for the test to succeed final int threads = 16; mConf.set(PropertyKey.USER_CLIENT_CACHE_ASYNC_WRITE_ENABLED, true); mConf.set(PropertyKey.USER_CLIENT_CACHE_ASYNC_WRITE_THREADS, threads); @@ -812,14 +822,30 @@ public void asyncCache() throws Exception { pageStore.setPutHanging(true); mPageMetaStore = new DefaultPageMetaStore(ImmutableList.of(dir)); mCacheManager = createLocalCacheManager(mConf, mPageMetaStore); + Set lockedPages = new HashSet<>(); for (int i = 0; i < threads; i++) { PageId pageId = new PageId("5", i); assertTrue(mCacheManager.put(pageId, page(i, PAGE_SIZE_BYTES))); + lockedPages.add(mCacheManager.getPageLockId(pageId)); } - pageStore.setPutHanging(false); - //fallback to caller's thread when queue is full - assertTrue(mCacheManager.put(PAGE_ID1, PAGE1)); - while (pageStore.getPuts() < threads) { + // by setting the following line the hanging will only be stopped when the current + // thread adds a page + pageStore.setStopHangingThread(Thread.currentThread().getId()); + // fallback to caller's thread (the current here) when queue is full + // find a page id that is not already locked + int pageLockId; + long nxtIdx = 0; + PageId callerPageId; + do { + callerPageId = new PageId("0L", nxtIdx); + pageLockId = mCacheManager.getPageLockId(callerPageId); + nxtIdx++; + } while (lockedPages.contains(pageLockId)); + // this page will be inserted by the current thread and not a worker thread + assertTrue(mCacheManager.put(callerPageId, PAGE1)); + // Wait for all tasks to complete + // one for each thread worker thread, and one on the main thread + while (pageStore.getPuts() < threads + 1) { Thread.sleep(1000); } pageStore.setPutHanging(true); @@ -827,6 +853,7 @@ public void asyncCache() throws Exception { PageId pageId = new PageId("6", i); assertTrue(mCacheManager.put(pageId, page(i, PAGE_SIZE_BYTES))); } + pageStore.setPutHanging(false); } @Test @@ -850,6 +877,7 @@ public void asyncCacheSamePage() throws Exception { } pageStore.setPutHanging(true); assertTrue(mCacheManager.put(PAGE_ID1, PAGE1)); + pageStore.setPutHanging(false); } @Test diff --git a/core/client/fs/src/test/java/alluxio/client/file/cache/TwoChoiceRandomEvictorTest.java b/core/client/fs/src/test/java/alluxio/client/file/cache/TwoChoiceRandomEvictorTest.java new file mode 100644 index 000000000000..0e7686186164 --- /dev/null +++ b/core/client/fs/src/test/java/alluxio/client/file/cache/TwoChoiceRandomEvictorTest.java @@ -0,0 +1,82 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.file.cache; + +import alluxio.client.file.cache.evictor.CacheEvictorOptions; +import alluxio.client.file.cache.evictor.TwoChoiceRandomEvictor; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests for the {@link TwoChoiceRandomEvictor} class. + */ +public class TwoChoiceRandomEvictorTest { + private TwoChoiceRandomEvictor mEvictor; + private final PageId mFirst = new PageId("1L", 2L); + private final PageId mSecond = new PageId("3L", 4L); + private final PageId mThird = new PageId("5L", 6L); + + /** + * Sets up the instances. + */ + @Before + public void before() { + mEvictor = new TwoChoiceRandomEvictor(new CacheEvictorOptions()); + } + + @Test + public void evictGetOrder() { + mEvictor.updateOnGet(mFirst); + Assert.assertEquals(mFirst, mEvictor.evict()); + mEvictor.updateOnGet(mSecond); + PageId evictedPage = mEvictor.evict(); + Assert.assertTrue(evictedPage.equals(mFirst) || evictedPage.equals(mSecond)); + } + + @Test + public void evictPutOrder() { + mEvictor.updateOnPut(mFirst); + Assert.assertEquals(mFirst, mEvictor.evict()); + mEvictor.updateOnPut(mSecond); + mEvictor.updateOnPut(mFirst); + PageId evictedPage = mEvictor.evict(); + Assert.assertTrue(evictedPage.equals(mFirst) || evictedPage.equals(mSecond)); + } + + @Test + public void evictAfterDelete() { + mEvictor.updateOnPut(mFirst); + mEvictor.updateOnPut(mSecond); + mEvictor.updateOnPut(mThird); + mEvictor.updateOnDelete(mSecond); + mEvictor.updateOnDelete(mThird); + Assert.assertEquals(mFirst, mEvictor.evict()); + } + + @Test + public void evictEmpty() { + Assert.assertNull(mEvictor.evict()); + } + + @Test + public void evictAllGone() { + mEvictor.updateOnPut(mFirst); + mEvictor.updateOnPut(mSecond); + mEvictor.updateOnPut(mThird); + mEvictor.updateOnDelete(mFirst); + mEvictor.updateOnDelete(mSecond); + mEvictor.updateOnDelete(mThird); + Assert.assertNull(mEvictor.evict()); + } +} diff --git a/core/client/fs/src/test/java/alluxio/client/file/options/OutStreamOptionsTest.java b/core/client/fs/src/test/java/alluxio/client/file/options/OutStreamOptionsTest.java index 65e27b5d43e2..a8726f5c2b64 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/options/OutStreamOptionsTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/options/OutStreamOptionsTest.java @@ -102,7 +102,7 @@ public void defaults() throws IOException { assertEquals(ModeUtils.applyFileUMask(Mode.defaults(), mConf.getString(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_UMASK)), options.getMode()); assertEquals(Constants.NO_TTL, options.getCommonOptions().getTtl()); - assertEquals(TtlAction.DELETE, options.getCommonOptions().getTtlAction()); + assertEquals(TtlAction.FREE, options.getCommonOptions().getTtlAction()); assertEquals(ufsType, options.getUnderStorageType()); assertEquals(WriteType.CACHE_THROUGH, options.getWriteType()); assertEquals(Constants.LAST_TIER, options.getWriteTier()); diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java b/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java index 4aeae0376deb..4589dd252673 100644 --- a/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java @@ -31,10 +31,12 @@ import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.DeletePOptions; +import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.SetAttributePOptions; import alluxio.master.MasterInquireClient.Factory; import alluxio.security.CurrentUser; import alluxio.security.authorization.Mode; +import alluxio.util.ModeUtils; import alluxio.wire.BlockLocationInfo; import alluxio.wire.FileBlockInfo; import alluxio.wire.WorkerNetAddress; @@ -60,7 +62,6 @@ import java.security.AccessControlContext; import java.security.AccessController; import java.security.PrivilegedExceptionAction; -import java.text.MessageFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -88,6 +89,7 @@ public abstract class AbstractFileSystem extends org.apache.hadoop.fs.FileSystem private Path mWorkingDir = new Path(AlluxioURI.SEPARATOR); private Statistics mStatistics = null; private String mAlluxioHeader = null; + private boolean mExcludeMountInfoOnListStatus; /** * Constructs a new {@link AbstractFileSystem} instance with specified a {@link FileSystem} @@ -149,6 +151,27 @@ public void close() throws IOException { mFileSystem.close(); } + /** + * Attempts to create a file with default permission. + * Overwrite will not succeed if the path exists and is a folder. + * + * @param path path to create + * @param overwrite overwrite if file exists + * @param bufferSize the size in bytes of the buffer to be used + * @param replication under filesystem replication factor, this is ignored + * @param blockSize block size in bytes + * @param progress queryable progress + * @return an {@link FSDataOutputStream} created at the indicated path of a file + */ + @Override + public FSDataOutputStream create(Path path, boolean overwrite, int bufferSize, short replication, + long blockSize, Progressable progress) throws IOException { + String confUmask = mAlluxioConf.getString(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_UMASK); + Mode mode = ModeUtils.applyFileUMask(Mode.defaults(), confUmask); + return this.create(path, new FsPermission(mode.toShort()), overwrite, bufferSize, replication, + blockSize, progress); + } + /** * Attempts to create a file. Overwrite will not succeed if the path exists and is a folder. * @@ -172,29 +195,14 @@ public FSDataOutputStream create(Path path, FsPermission permission, boolean ove AlluxioURI uri = getAlluxioPath(path); CreateFilePOptions options = CreateFilePOptions.newBuilder().setBlockSizeBytes(blockSize) - .setMode(new Mode(permission.toShort()).toProto()).setRecursive(true).build(); + .setMode(new Mode(permission.toShort()).toProto()).setRecursive(true) + .setOverwrite(overwrite).build(); FileOutStream outStream; try { outStream = mFileSystem.createFile(uri, options); } catch (AlluxioException e) { - //now we should consider the override parameter - try { - if (mFileSystem.exists(uri)) { - if (!overwrite) { - throw new IOException( - "Not allowed to create() (overwrite=false) for existing Alluxio path: " + uri); - } - if (mFileSystem.getStatus(uri).isFolder()) { - throw new IOException(MessageFormat - .format("{0} already exists. Directories cannot be overwritten with create", uri)); - } - mFileSystem.delete(uri); - } - outStream = mFileSystem.createFile(uri, options); - } catch (AlluxioException e2) { - throw new IOException(e2); - } + throw new IOException(e); } return new FSDataOutputStream(outStream, mStatistics); } @@ -325,7 +333,7 @@ public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long l info.getBlockInfo().getLength())); } }); - BlockLocation[] ret = blockLocations.toArray(new BlockLocation[blockLocations.size()]); + BlockLocation[] ret = blockLocations.toArray(new BlockLocation[0]); if (LOG.isDebugEnabled()) { LOG.debug("getFileBlockLocations({}, {}, {}) returned {}", file.getPath().getName(), start, len, Arrays.toString(ret)); @@ -521,6 +529,8 @@ public synchronized void initialize(URI uri, org.apache.hadoop.conf.Configuratio // Creating a new instanced configuration from an AlluxioProperties object isn't expensive. mAlluxioConf = new InstancedConfiguration(alluxioProps); mAlluxioConf.validate(); + mExcludeMountInfoOnListStatus = mAlluxioConf.getBoolean( + PropertyKey.USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS); if (mFileSystem != null) { return; @@ -596,7 +606,9 @@ public FileStatus[] listStatus(Path path) throws IOException { AlluxioURI uri = getAlluxioPath(path); List statuses; try { - statuses = mFileSystem.listStatus(uri); + ListStatusPOptions listStatusPOptions = ListStatusPOptions.getDefaultInstance().toBuilder() + .setExcludeMountInfo(mExcludeMountInfoOnListStatus).build(); + statuses = mFileSystem.listStatus(uri, listStatusPOptions); } catch (FileDoesNotExistException e) { throw new FileNotFoundException(getAlluxioPath(path).toString()); } catch (AlluxioException e) { @@ -611,6 +623,20 @@ public FileStatus[] listStatus(Path path) throws IOException { return ret; } + /** + * Attempts to create a folder with the specified path with default permission. + * Parent directories will be created. + * + * @param path path to create + * @return true if the indicated folder is created successfully or already exists + */ + @Override + public boolean mkdirs(Path path) throws IOException { + String confUmask = mAlluxioConf.getString(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_UMASK); + Mode mode = ModeUtils.applyDirectoryUMask(Mode.defaults(), confUmask); + return mkdirs(path, new FsPermission(mode.toShort())); + } + /** * Attempts to create a folder with the specified path. Parent directories will be created. * diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/AlluxioHdfsInputStream.java b/core/client/hdfs/src/main/java/alluxio/hadoop/AlluxioHdfsInputStream.java index 581ae7e8de95..86cd96e7d10c 100644 --- a/core/client/hdfs/src/main/java/alluxio/hadoop/AlluxioHdfsInputStream.java +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/AlluxioHdfsInputStream.java @@ -14,6 +14,7 @@ import alluxio.client.file.FileInStream; import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.ByteBufferReadable; import org.apache.hadoop.fs.FSDataInputStream; import java.io.IOException; @@ -50,7 +51,30 @@ public int read() throws IOException { @Override public int read(ByteBuffer buf) throws IOException { - return mInput.read(buf); + // @see FSDataInputStream.java + if (mInput.getWrappedStream() instanceof ByteBufferReadable) { + return mInput.read(buf); + } else { + int off = buf.position(); + int len = buf.remaining(); + final int totalBytesRead; + if (buf.hasArray()) { + byte[] byteArray = buf.array(); + totalBytesRead = read(byteArray, buf.arrayOffset() + off, len); + if (totalBytesRead > 0) { + buf.position(off + totalBytesRead); + } + } else { + byte[] byteArray = new byte[len]; + totalBytesRead = read(byteArray); + if (totalBytesRead > 0) { + buf.put(byteArray, 0, totalBytesRead); + } + } + return totalBytesRead; + } } @Override @@ -104,4 +128,9 @@ public int positionedRead(long position, byte[] buffer, int offset, int length) throws IOException { return mInput.read(position, buffer, offset, length); } + + @Override + public void unbuffer() { + mInput.unbuffer(); + } } diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/BaseHdfsFileInputStream.java b/core/client/hdfs/src/main/java/alluxio/hadoop/BaseHdfsFileInputStream.java new file mode 100644 index 000000000000..bfe1c1278c96 --- /dev/null +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/BaseHdfsFileInputStream.java @@ -0,0 +1,206 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.hadoop; + +import alluxio.AlluxioURI; +import alluxio.client.file.FileInStream; +import alluxio.client.file.FileSystem; +import alluxio.exception.AlluxioException; +import alluxio.exception.ExceptionMessage; +import alluxio.exception.FileDoesNotExistException; + +import org.apache.hadoop.fs.ByteBufferReadable; +import org.apache.hadoop.fs.FileSystem.Statistics; +import org.apache.hadoop.fs.PositionedReadable; +import org.apache.hadoop.fs.Seekable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.EOFException; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * An input stream for reading a file from HDFS. This is just a wrapper around + * {@link FileInStream} with additional statistics gathering in a {@link Statistics} object. + */ +@NotThreadSafe +public class BaseHdfsFileInputStream extends InputStream implements Seekable, PositionedReadable, + ByteBufferReadable { + private static final Logger LOG = LoggerFactory.getLogger(BaseHdfsFileInputStream.class); + + private final Statistics mStatistics; + protected final FileInStream mInputStream; + + private boolean mClosed = false; + + /** + * Constructs a new stream for reading a file from HDFS. + * + * @param fs the file system + * @param uri the Alluxio file URI + * @param stats filesystem statistics + */ + public BaseHdfsFileInputStream(FileSystem fs, AlluxioURI uri, Statistics stats) + throws IOException { + LOG.debug("HdfsFileInputStream({}, {})", uri, stats); + + mStatistics = stats; + try { + mInputStream = fs.openFile(uri); + } catch (FileDoesNotExistException e) { + // Transform the Alluxio exception to a Java exception to satisfy the HDFS API contract. + throw new FileNotFoundException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(uri)); + } catch (AlluxioException e) { + throw new IOException(e); + } + } + + /** + * Constructs a new stream for reading a file from HDFS. + * + * @param inputStream the input stream + * @param stats filesystem statistics + */ + public BaseHdfsFileInputStream(FileInStream inputStream, Statistics stats) { + mInputStream = inputStream; + mStatistics = stats; + } + + @Override + public int available() throws IOException { + if (mClosed) { + throw new IOException("Cannot query available bytes from a closed stream."); + } + return (int) mInputStream.remaining(); + } + + @Override + public void close() throws IOException { + if (mClosed) { + return; + } + mInputStream.close(); + mClosed = true; + } + + @Override + public long getPos() throws IOException { + return mInputStream.getPos(); + } + + @Override + public int read() throws IOException { + if (mClosed) { + throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); + } + + int data = mInputStream.read(); + if (data != -1 && mStatistics != null) { + mStatistics.incrementBytesRead(1); + } + return data; + } + + @Override + public int read(byte[] buffer) throws IOException { + return read(buffer, 0, buffer.length); + } + + @Override + public int read(byte[] buffer, int offset, int length) throws IOException { + if (mClosed) { + throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); + } + + int bytesRead = mInputStream.read(buffer, offset, length); + if (bytesRead != -1 && mStatistics != null) { + mStatistics.incrementBytesRead(bytesRead); + } + return bytesRead; + } + + @Override + public int read(ByteBuffer buf) throws IOException { + if (mClosed) { + throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); + } + int bytesRead = mInputStream.read(buf); + if (bytesRead != -1 && mStatistics != null) { + mStatistics.incrementBytesRead(bytesRead); + } + return bytesRead; + } + + @Override + public int read(long position, byte[] buffer, int offset, int length) throws IOException { + if (mClosed) { + throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); + } + + int bytesRead = mInputStream.positionedRead(position, buffer, offset, length); + if (bytesRead != -1 && mStatistics != null) { + mStatistics.incrementBytesRead(bytesRead); + } + return bytesRead; + } + + @Override + public void readFully(long position, byte[] buffer) throws IOException { + readFully(position, buffer, 0, buffer.length); + } + + @Override + public void readFully(long position, byte[] buffer, int offset, int length) throws IOException { + int totalBytesRead = 0; + while (totalBytesRead < length) { + int bytesRead = + read(position + totalBytesRead, buffer, offset + totalBytesRead, length - totalBytesRead); + if (bytesRead == -1) { + throw new EOFException(); + } + totalBytesRead += bytesRead; + } + } + + @Override + public void seek(long pos) throws IOException { + try { + mInputStream.seek(pos); + } catch (IllegalArgumentException e) { // convert back to IOException + throw new IOException(e); + } + } + + /** + * This method is not supported in {@link BaseHdfsFileInputStream}. + * + * @param targetPos N/A + * @return N/A + * @throws IOException always + */ + @Override + public boolean seekToNewSource(long targetPos) throws IOException { + throw new IOException("This method is not supported."); + } + + @Override + public long skip(long n) throws IOException { + if (mClosed) { + throw new IOException("Cannot skip bytes in a closed stream."); + } + return mInputStream.skip(n); + } +} diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/HdfsFileInputStream.java b/core/client/hdfs/src/main/java/alluxio/hadoop/HdfsFileInputStream.java index 8ac3f7ab3285..a00901202458 100644 --- a/core/client/hdfs/src/main/java/alluxio/hadoop/HdfsFileInputStream.java +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/HdfsFileInputStream.java @@ -14,37 +14,15 @@ import alluxio.AlluxioURI; import alluxio.client.file.FileInStream; import alluxio.client.file.FileSystem; -import alluxio.exception.AlluxioException; -import alluxio.exception.ExceptionMessage; -import alluxio.exception.FileDoesNotExistException; -import org.apache.hadoop.fs.ByteBufferReadable; import org.apache.hadoop.fs.FileSystem.Statistics; -import org.apache.hadoop.fs.PositionedReadable; -import org.apache.hadoop.fs.Seekable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.io.EOFException; -import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStream; -import java.nio.ByteBuffer; -import javax.annotation.concurrent.NotThreadSafe; /** - * An input stream for reading a file from HDFS. This is just a wrapper around - * {@link FileInStream} with additional statistics gathering in a {@link Statistics} object. + * HdfsFileInputStream implement for hadoop 1 and hadoop 2. */ -@NotThreadSafe -public class HdfsFileInputStream extends InputStream implements Seekable, PositionedReadable, - ByteBufferReadable { - private static final Logger LOG = LoggerFactory.getLogger(HdfsFileInputStream.class); - - private final Statistics mStatistics; - private final FileInStream mInputStream; - - private boolean mClosed = false; +public class HdfsFileInputStream extends BaseHdfsFileInputStream { /** * Constructs a new stream for reading a file from HDFS. @@ -53,19 +31,8 @@ public class HdfsFileInputStream extends InputStream implements Seekable, Positi * @param uri the Alluxio file URI * @param stats filesystem statistics */ - public HdfsFileInputStream(FileSystem fs, AlluxioURI uri, Statistics stats) - throws IOException { - LOG.debug("HdfsFileInputStream({}, {})", uri, stats); - - mStatistics = stats; - try { - mInputStream = fs.openFile(uri); - } catch (FileDoesNotExistException e) { - // Transform the Alluxio exception to a Java exception to satisfy the HDFS API contract. - throw new FileNotFoundException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(uri)); - } catch (AlluxioException e) { - throw new IOException(e); - } + public HdfsFileInputStream(FileSystem fs, AlluxioURI uri, Statistics stats) throws IOException { + super(fs, uri, stats); } /** @@ -75,132 +42,6 @@ public HdfsFileInputStream(FileSystem fs, AlluxioURI uri, Statistics stats) * @param stats filesystem statistics */ public HdfsFileInputStream(FileInStream inputStream, Statistics stats) { - mInputStream = inputStream; - mStatistics = stats; - } - - @Override - public int available() throws IOException { - if (mClosed) { - throw new IOException("Cannot query available bytes from a closed stream."); - } - return (int) mInputStream.remaining(); - } - - @Override - public void close() throws IOException { - if (mClosed) { - return; - } - mInputStream.close(); - mClosed = true; - } - - @Override - public long getPos() throws IOException { - return mInputStream.getPos(); - } - - @Override - public int read() throws IOException { - if (mClosed) { - throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); - } - - int data = mInputStream.read(); - if (data != -1 && mStatistics != null) { - mStatistics.incrementBytesRead(1); - } - return data; - } - - @Override - public int read(byte[] buffer) throws IOException { - return read(buffer, 0, buffer.length); - } - - @Override - public int read(byte[] buffer, int offset, int length) throws IOException { - if (mClosed) { - throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); - } - - int bytesRead = mInputStream.read(buffer, offset, length); - if (bytesRead != -1 && mStatistics != null) { - mStatistics.incrementBytesRead(bytesRead); - } - return bytesRead; - } - - @Override - public int read(ByteBuffer buf) throws IOException { - if (mClosed) { - throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); - } - int bytesRead = mInputStream.read(buf); - if (bytesRead != -1 && mStatistics != null) { - mStatistics.incrementBytesRead(bytesRead); - } - return bytesRead; - } - - @Override - public int read(long position, byte[] buffer, int offset, int length) throws IOException { - if (mClosed) { - throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); - } - - int bytesRead = mInputStream.positionedRead(position, buffer, offset, length); - if (bytesRead != -1 && mStatistics != null) { - mStatistics.incrementBytesRead(bytesRead); - } - return bytesRead; - } - - @Override - public void readFully(long position, byte[] buffer) throws IOException { - readFully(position, buffer, 0, buffer.length); - } - - @Override - public void readFully(long position, byte[] buffer, int offset, int length) throws IOException { - int totalBytesRead = 0; - while (totalBytesRead < length) { - int bytesRead = - read(position + totalBytesRead, buffer, offset + totalBytesRead, length - totalBytesRead); - if (bytesRead == -1) { - throw new EOFException(); - } - totalBytesRead += bytesRead; - } - } - - @Override - public void seek(long pos) throws IOException { - try { - mInputStream.seek(pos); - } catch (IllegalArgumentException e) { // convert back to IOException - throw new IOException(e); - } - } - - /** - * This method is not supported in {@link HdfsFileInputStream}. - * - * @param targetPos N/A - * @return N/A - * @throws IOException always - */ - @Override - public boolean seekToNewSource(long targetPos) throws IOException { - throw new IOException("This method is not supported."); - } - - @Override - public long skip(long n) throws IOException { - if (mClosed) { - throw new IOException("Cannot skip bytes in a closed stream."); - } - return mInputStream.skip(n); + super(inputStream, stats); } } diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java b/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java index 68d841d6ce4d..5b0c74fd6fe7 100644 --- a/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java @@ -15,18 +15,19 @@ import static java.nio.charset.StandardCharsets.UTF_8; import alluxio.AlluxioURI; -import alluxio.Constants; import alluxio.client.file.CacheContext; import alluxio.client.file.URIStatus; import alluxio.client.file.cache.CacheManager; import alluxio.client.file.cache.LocalCacheFileInStream; import alluxio.client.file.cache.filter.CacheFilter; import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; import alluxio.metrics.MetricsConfig; import alluxio.metrics.MetricsSystem; import alluxio.wire.FileInfo; import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -39,10 +40,8 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; -import java.util.HashSet; import java.util.Map; import java.util.Properties; -import java.util.Set; /** * An Alluxio client compatible with Apache Hadoop {@link org.apache.hadoop.fs.FileSystem} @@ -51,12 +50,6 @@ */ public class LocalCacheFileSystem extends org.apache.hadoop.fs.FileSystem { private static final Logger LOG = LoggerFactory.getLogger(LocalCacheFileSystem.class); - private static final Set SUPPORTED_FS = new HashSet() { - { - add(Constants.SCHEME); - add("ws"); - } - }; /** The external Hadoop filesystem to query on cache miss. */ private final org.apache.hadoop.fs.FileSystem mExternalFileSystem; @@ -88,10 +81,6 @@ public LocalCacheFileSystem(org.apache.hadoop.fs.FileSystem fileSystem, @Override public synchronized void initialize(URI uri, org.apache.hadoop.conf.Configuration conf) throws IOException { - if (!SUPPORTED_FS.contains(uri.getScheme())) { - throw new UnsupportedOperationException( - uri.getScheme() + " is not supported as the external filesystem."); - } super.initialize(uri, conf); mHadoopConf = conf; // Set statistics @@ -143,8 +132,15 @@ public FSDataInputStream open(Path path, int bufferSize) throws IOException { .setGroup(externalFileStatus.getGroup()); // FilePath is a unique identifier for a file, however it can be a long string // hence using md5 hash of the file path as the identifier in the cache. - CacheContext context = CacheContext.defaults().setCacheIdentifier( - md5().hashString(externalFileStatus.getPath().toString(), UTF_8).toString()); + String cacheIdentifier; + if (mAlluxioConf.getBoolean(PropertyKey.USER_CLIENT_CACHE_IDENTIFIER_INCLUDE_MTIME)) { + // include mtime to avoid consistency issues if the file may update + cacheIdentifier = md5().hashString(externalFileStatus.getPath().toString() + + externalFileStatus.getModificationTime(), UTF_8).toString(); + } else { + cacheIdentifier = md5().hashString(externalFileStatus.getPath().toString(), UTF_8).toString(); + } + CacheContext context = CacheContext.defaults().setCacheIdentifier(cacheIdentifier); URIStatus status = new URIStatus(info, context); return open(status, bufferSize); } @@ -218,4 +214,22 @@ public boolean mkdirs(Path f, FsPermission permission) throws IOException { public FileStatus getFileStatus(Path f) throws IOException { return mExternalFileSystem.getFileStatus(f); } + + @Override + public BlockLocation[] getFileBlockLocations(FileStatus file, long start, + long len) throws IOException { + // Applications use the block information here to schedule/distribute the tasks. + // Return the UFS locations directly instead of the local cache location, + // so the application can schedule the tasks accordingly + return mExternalFileSystem.getFileBlockLocations(file, start, len); + } + + @Override + public BlockLocation[] getFileBlockLocations(Path p, long start, long len) + throws IOException { + // Applications use the block information here to schedule/distribute the tasks. + // Return the UFS locations directly instead of the local cache location, + // so the application can schedule the tasks accordingly + return mExternalFileSystem.getFileBlockLocations(p, start, len); + } } diff --git a/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java b/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java index 7ccba889e7a0..78ae1e49229a 100644 --- a/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java +++ b/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java @@ -37,7 +37,9 @@ import alluxio.client.file.URIStatus; import alluxio.conf.InstancedConfiguration; import alluxio.conf.PropertyKey; +import alluxio.exception.ExceptionMessage; import alluxio.exception.FileAlreadyExistsException; +import alluxio.grpc.ListStatusPOptions; import alluxio.util.ConfigurationUtils; import alluxio.wire.BlockInfo; import alluxio.wire.FileBlockInfo; @@ -50,6 +52,8 @@ import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsCreateModes; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.security.UserGroupInformation; import org.junit.After; import org.junit.Before; @@ -405,9 +409,58 @@ public void listStatus() throws Exception { Path path = new Path("/dir"); alluxio.client.file.FileSystem alluxioFs = mock(alluxio.client.file.FileSystem.class); - when(alluxioFs.listStatus(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)))) + FileSystem alluxioHadoopFs = new FileSystem(alluxioFs); + URI uri = URI.create(Constants.HEADER + "host:1"); + alluxioHadoopFs.initialize(uri, getConf()); + ListStatusPOptions listStatusPOptions = ListStatusPOptions.getDefaultInstance().toBuilder() + .setExcludeMountInfo(alluxioHadoopFs.mAlluxioConf.getBoolean( + PropertyKey.USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS)).build(); + when(alluxioFs.listStatus(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)), + listStatusPOptions)) .thenReturn(Lists.newArrayList(new URIStatus(fileInfo1), new URIStatus(fileInfo2))); + + FileStatus[] fileStatuses = alluxioHadoopFs.listStatus(path); + assertFileInfoEqualsFileStatus(fileInfo1, fileStatuses[0]); + assertFileInfoEqualsFileStatus(fileInfo2, fileStatuses[1]); + alluxioHadoopFs.close(); + } + + /** + * Tests that the {@link AbstractFileSystem#listStatus(Path)} method uses + * {@link URIStatus#getLastModificationTimeMs()} correctly without mount info. + */ + @Test + public void listStatusWithoutMountInfo() throws Exception { + FileInfo fileInfo1 = new FileInfo() + .setLastModificationTimeMs(111L) + .setLastAccessTimeMs(123L) + .setFolder(false) + .setOwner("user1") + .setGroup("group1") + .setMode(00755); + FileInfo fileInfo2 = new FileInfo() + .setLastModificationTimeMs(222L) + .setLastAccessTimeMs(234L) + .setFolder(true) + .setOwner("user2") + .setGroup("group2") + .setMode(00644); + + Path path = new Path("/dir"); + alluxio.client.file.FileSystem alluxioFs = + mock(alluxio.client.file.FileSystem.class); FileSystem alluxioHadoopFs = new FileSystem(alluxioFs); + URI uri = URI.create(Constants.HEADER + "host:1"); + Configuration configuration = getConf(); + configuration.setBoolean( + PropertyKey.USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS.getName(), + true); + alluxioHadoopFs.initialize(uri, configuration); + ListStatusPOptions listStatusPOptions = ListStatusPOptions.getDefaultInstance().toBuilder() + .setExcludeMountInfo(true).build(); + when(alluxioFs.listStatus(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)), + listStatusPOptions)) + .thenReturn(Lists.newArrayList(new URIStatus(fileInfo1), new URIStatus(fileInfo2))); FileStatus[] fileStatuses = alluxioHadoopFs.listStatus(path); assertFileInfoEqualsFileStatus(fileInfo1, fileStatuses[0]); @@ -425,9 +478,15 @@ public void throwFileNotFoundExceptionWhenListStatusNonExistingTest() throws Exc try { Path path = new Path("/ALLUXIO-2036"); alluxio.client.file.FileSystem alluxioFs = mock(alluxio.client.file.FileSystem.class); - when(alluxioFs.listStatus(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)))) - .thenThrow(new FileNotFoundException("ALLUXIO-2036 not Found")); alluxioHadoopFs = new FileSystem(alluxioFs); + URI uri = URI.create(Constants.HEADER + "host:1"); + alluxioHadoopFs.initialize(uri, getConf()); + ListStatusPOptions listStatusPOptions = ListStatusPOptions.getDefaultInstance().toBuilder() + .setExcludeMountInfo(alluxioHadoopFs.mAlluxioConf.getBoolean( + PropertyKey.USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS)).build(); + when(alluxioFs.listStatus(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)), + listStatusPOptions)) + .thenThrow(new FileNotFoundException("ALLUXIO-2036 not Found")); FileStatus[] fileStatuses = alluxioHadoopFs.listStatus(path); // if we reach here, FileNotFoundException is not thrown hence Fail the test case assertTrue(false); @@ -665,13 +724,17 @@ public void createWithoutOverwrite() throws Exception { when(alluxioFs.exists(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)))) .thenReturn(true); when(alluxioFs.createFile(eq(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path))), any())) - .thenThrow(new FileAlreadyExistsException(path.toString())); + .thenThrow(new FileAlreadyExistsException( + ExceptionMessage.CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE.getMessage(path.toString()))); try (FileSystem alluxioHadoopFs = new FileSystem(alluxioFs)) { - alluxioHadoopFs.create(path, false, 100, (short) 1, 1000); + alluxioHadoopFs.create(path, + FsCreateModes.applyUMask(FsPermission.getFileDefault(), FsPermission.getUMask(getConf())), + false, 100, (short) 1, 1000, null); fail("create() of existing file is expected to fail"); } catch (IOException e) { - assertEquals("Not allowed to create() (overwrite=false) for existing Alluxio path: " + path, + assertEquals("alluxio.exception.FileAlreadyExistsException: " + + ExceptionMessage.CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE.getMessage(path), e.getMessage()); } } diff --git a/core/client/hdfs3/pom.xml b/core/client/hdfs3/pom.xml new file mode 100644 index 000000000000..3bc1b3b8cb14 --- /dev/null +++ b/core/client/hdfs3/pom.xml @@ -0,0 +1,57 @@ + + + 4.0.0 + + org.alluxio + alluxio-core-client + 2.10.0-SNAPSHOT + + alluxio-core-client-hdfs3 + jar + Alluxio Core - Client - HDFS3 + HDFS Client of Alluxio Core For HDFS 3 + + + + + ${project.parent.parent.parent.basedir}/build + false + + + + + + org.alluxio + alluxio-core-client-hdfs + ${project.version} + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + + diff --git a/core/client/hdfs3/src/main/java/alluxio/hadoop/HdfsFileInputStream.java b/core/client/hdfs3/src/main/java/alluxio/hadoop/HdfsFileInputStream.java new file mode 100644 index 000000000000..9c1f6511b2db --- /dev/null +++ b/core/client/hdfs3/src/main/java/alluxio/hadoop/HdfsFileInputStream.java @@ -0,0 +1,66 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.hadoop; + +import alluxio.AlluxioURI; +import alluxio.client.file.FileInStream; +import alluxio.client.file.FileSystem; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.CanUnbuffer; +import org.apache.hadoop.fs.FileSystem.Statistics; +import org.apache.hadoop.fs.StreamCapabilities; + +import java.io.IOException; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * HdfsFileInputStream implement for hadoop 3. + * This is just a wrapper around {@link HdfsFileInputStream} with + * CanUnbuffer and StreamCapabilities support. + */ +@NotThreadSafe +public class HdfsFileInputStream extends BaseHdfsFileInputStream + implements CanUnbuffer, StreamCapabilities { + /** + * Constructs a new stream for reading a file from HDFS. + * + * @param fs the file system + * @param uri the Alluxio file URI + * @param stats filesystem statistics + */ + public HdfsFileInputStream(FileSystem fs, AlluxioURI uri, Statistics stats) + throws IOException { + super(fs, uri, stats); + } + + /** + * Constructs a new stream for reading a file from HDFS. + * + * @param inputStream the input stream + * @param stats filesystem statistics + */ + public HdfsFileInputStream(FileInStream inputStream, Statistics stats) { + super(inputStream, stats); + } + + @Override + public boolean hasCapability(String capability) { + return StringUtils.equalsIgnoreCase("in:unbuffer", capability) + || StringUtils.equalsIgnoreCase("in:readbytebuffer", capability); + } + + @Override + public void unbuffer() { + mInputStream.unbuffer(); + } +} diff --git a/core/client/pom.xml b/core/client/pom.xml index 338db707b66b..46895c9ea6cf 100644 --- a/core/client/pom.xml +++ b/core/client/pom.xml @@ -25,6 +25,7 @@ fs hdfs + hdfs3 diff --git a/core/common/pom.xml b/core/common/pom.xml index b8adf2b56652..32539cde59d8 100644 --- a/core/common/pom.xml +++ b/core/common/pom.xml @@ -87,6 +87,10 @@ io.grpc grpc-netty + + io.grpc + grpc-services + io.grpc grpc-stub @@ -99,6 +103,10 @@ org.apache.commons commons-lang3 + + org.apache.commons + commons-compress + org.apache.curator curator-client @@ -123,7 +131,7 @@ io.netty netty-tcnative-boringssl-static - 2.0.26.Final + 2.0.56.Final @@ -151,6 +159,12 @@ test-jar test + + org.junit.jupiter + junit-jupiter-api + ${jupiter.version} + test + diff --git a/core/common/src/main/java/alluxio/AbstractClient.java b/core/common/src/main/java/alluxio/AbstractClient.java index 936318346402..efe5f0fcde6f 100644 --- a/core/common/src/main/java/alluxio/AbstractClient.java +++ b/core/common/src/main/java/alluxio/AbstractClient.java @@ -139,7 +139,10 @@ protected long getRemoteServiceVersion() throws AlluxioStatusException { try { return mVersionService .getServiceVersion( - GetServiceVersionPRequest.newBuilder().setServiceType(getRemoteServiceType()).build()) + GetServiceVersionPRequest.newBuilder() + .setServiceType(getRemoteServiceType()) + .setAllowedOnStandbyMasters(true) + .build()) .getVersion(); } catch (Throwable t) { throw AlluxioStatusException.fromThrowable(t); @@ -484,6 +487,7 @@ private synchronized V retryRPCInternal(RetryPolicy retryPolicy, RpcCallable if (se.getStatusCode() == Status.Code.UNAVAILABLE || se.getStatusCode() == Status.Code.CANCELLED || se.getStatusCode() == Status.Code.UNAUTHENTICATED + || se.getStatusCode() == Status.Code.UNIMPLEMENTED // for standby grpc enabled || e.getCause() instanceof UnresolvedAddressException) { ex = se; } else { diff --git a/core/common/src/main/java/alluxio/AbstractMasterClient.java b/core/common/src/main/java/alluxio/AbstractMasterClient.java index 9393980f57c0..555a0edea14d 100644 --- a/core/common/src/main/java/alluxio/AbstractMasterClient.java +++ b/core/common/src/main/java/alluxio/AbstractMasterClient.java @@ -68,6 +68,21 @@ public AbstractMasterClient( mMasterSelectionPolicy = MasterSelectionPolicy.Factory.primaryMaster(); } + /** + * Creates a new master client without a specific address. + * @param clientConf master client configuration + * @param selectionPolicy master selection policy: which master the client should connect to + * @param retryPolicySupplier retry policy to use + */ + public AbstractMasterClient( + MasterClientContext clientConf, + MasterSelectionPolicy selectionPolicy, + Supplier retryPolicySupplier) { + super(clientConf, retryPolicySupplier); + mMasterInquireClient = clientConf.getMasterInquireClient(); + mMasterSelectionPolicy = selectionPolicy; + } + @Override public synchronized InetSocketAddress getConfAddress() throws UnavailableException { return mMasterSelectionPolicy.getPrimaryMasterAddressCached(mMasterInquireClient); diff --git a/core/common/src/main/java/alluxio/AlluxioURI.java b/core/common/src/main/java/alluxio/AlluxioURI.java index 055222caa8f4..11e8e8c5ffb5 100644 --- a/core/common/src/main/java/alluxio/AlluxioURI.java +++ b/core/common/src/main/java/alluxio/AlluxioURI.java @@ -449,6 +449,18 @@ public static String normalizePath(String path) { * @return true the current alluxioURI is an ancestor of the AlluxioURI */ public boolean isAncestorOf(AlluxioURI alluxioURI) throws InvalidPathException { + return isAncestorOf(alluxioURI, true); + } + + /** + * Returns true if the current AlluxioURI is an ancestor of another AlluxioURI. + * otherwise, return false. + * @param alluxioURI potential children to check + * @param cleanPath if the paths should be cleaned + * @return true the current alluxioURI is an ancestor of the AlluxioURI + */ + public boolean isAncestorOf(AlluxioURI alluxioURI, boolean cleanPath) + throws InvalidPathException { // To be an ancestor of another URI, authority and scheme must match if (!Objects.equals(getAuthority(), alluxioURI.getAuthority())) { return false; @@ -458,7 +470,7 @@ public boolean isAncestorOf(AlluxioURI alluxioURI) throws InvalidPathException { } return PathUtils.hasPrefix(PathUtils.normalizePath(alluxioURI.getPath(), SEPARATOR), - PathUtils.normalizePath(getPath(), SEPARATOR)); + PathUtils.normalizePath(getPath(), SEPARATOR), cleanPath); } /** diff --git a/core/common/src/main/java/alluxio/Constants.java b/core/common/src/main/java/alluxio/Constants.java index 588675d1d662..129578ea2bdc 100644 --- a/core/common/src/main/java/alluxio/Constants.java +++ b/core/common/src/main/java/alluxio/Constants.java @@ -11,6 +11,7 @@ package alluxio; +import java.util.regex.Pattern; import javax.annotation.concurrent.ThreadSafe; /** @@ -95,6 +96,8 @@ public final class Constants { public static final long META_MASTER_CONFIG_SERVICE_VERSION = 2; public static final long META_MASTER_CLIENT_SERVICE_VERSION = 2; public static final long META_MASTER_MASTER_SERVICE_VERSION = 1; + public static final long META_MASTER_PROXY_SERVICE_VERSION = 1; + public static final long JOB_MASTER_MASTER_SERVICE_VERSION = 1; public static final long METRICS_MASTER_CLIENT_SERVICE_VERSION = 2; public static final long JOURNAL_MASTER_CLIENT_SERVICE_VERSION = 1; public static final long RAFT_JOURNAL_SERVICE_VERSION = 1; @@ -117,7 +120,9 @@ public final class Constants { // Its value is "MetaMaster" for backwards compatibility so 1.7 clients can talk to 1.8 MetaMaster public static final String META_MASTER_CONFIG_SERVICE_NAME = "MetaMaster"; public static final String META_MASTER_CLIENT_SERVICE_NAME = "MetaMaster"; + public static final String META_MASTER_PROXY_SERVICE_NAME = "MetaMasterProxy"; public static final String META_MASTER_MASTER_SERVICE_NAME = "MetaMasterMaster"; + public static final String JOB_MASTER_MASTER_SERVICE_NAME = "JobMasterMaster"; public static final String METRICS_MASTER_CLIENT_SERVICE_NAME = "MetricsMasterClient"; public static final String BLOCK_WORKER_CLIENT_SERVICE_NAME = "BlockWorkerClient"; public static final String FILE_SYSTEM_WORKER_CLIENT_SERVICE_NAME = "FileSystemWorkerClient"; @@ -228,5 +233,9 @@ public final class Constants { public static final String MEDIUM_HDD = "HDD"; public static final String MEDIUM_SSD = "SSD"; + // Log file pattern + public static final Pattern LOG_FILE_PATTERN = + Pattern.compile(".*(\\.log|\\.out)(\\.[0-9-]+)?$|.*.txt|.*.json"); + private Constants() {} // prevent instantiation } diff --git a/core/common/src/main/java/alluxio/RuntimeConstants.java b/core/common/src/main/java/alluxio/RuntimeConstants.java index 19e1fccf2ae3..a59d9d0aad63 100644 --- a/core/common/src/main/java/alluxio/RuntimeConstants.java +++ b/core/common/src/main/java/alluxio/RuntimeConstants.java @@ -11,6 +11,8 @@ package alluxio; +import alluxio.grpc.BuildVersion; + import javax.annotation.concurrent.ThreadSafe; /** @@ -36,6 +38,16 @@ public final class RuntimeConstants { } } + public static final String REVISION_SHORT = ProjectConstants.REVISION.length() > 8 + ? ProjectConstants.REVISION.substring(0, 8) : ProjectConstants.REVISION; + public static final String VERSION_AND_REVISION_SHORT = + VERSION + "-" + REVISION_SHORT; + public static final BuildVersion UNKNOWN_VERSION_INFO = BuildVersion.newBuilder() + .setVersion("UNKNOWN").setRevision("UNKNOWN").build(); + public static final BuildVersion CURRENT_VERSION_INFO = BuildVersion.newBuilder() + .setVersion(RuntimeConstants.VERSION) + .setRevision(RuntimeConstants.REVISION_SHORT).build(); + /** The relative path to the Alluxio target jar. */ public static final String ALLUXIO_JAR = "target/alluxio-" + VERSION + "-jar-with-dependencies.jar"; diff --git a/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java b/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java index 5037d36c3837..78ff4f662fb2 100755 --- a/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java +++ b/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java @@ -16,27 +16,34 @@ import alluxio.collections.Pair; import alluxio.concurrent.jsr.ForkJoinPool; import alluxio.conf.AlluxioConfiguration; +import alluxio.file.options.DescendantType; import alluxio.security.authorization.AccessControlList; import alluxio.security.authorization.AclEntry; import alluxio.security.authorization.DefaultAccessControlList; import alluxio.underfs.Fingerprint; import alluxio.underfs.UfsDirectoryStatus; import alluxio.underfs.UfsFileStatus; +import alluxio.underfs.UfsLoadResult; import alluxio.underfs.UfsMode; import alluxio.underfs.UfsStatus; import alluxio.underfs.UnderFileSystem; import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; +import alluxio.util.RateLimiter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.function.Consumer; +import javax.annotation.Nullable; /** * Forwarder for {@link UnderFileSystem} objects that works through with ForkJoinPool's @@ -259,7 +266,7 @@ public List execute() throws IOException { } @Override - public UfsFileStatus getFileStatus(String path) throws IOException { + public UfsFileStatus getFileStatus(String path, GetFileStatusOptions options) throws IOException { return new ManagedBlockingUfsMethod() { @Override public UfsFileStatus execute() throws IOException { @@ -288,6 +295,11 @@ public Fingerprint getParsedFingerprint(String path) { return mUfs.getParsedFingerprint(path); } + @Override + public Fingerprint getParsedFingerprint(String path, @Nullable String contentHash) { + return mUfs.getParsedFingerprint(path, contentHash); + } + @Override public UfsMode getOperationMode(Map physicalUfsState) { return mUfs.getOperationMode(physicalUfsState); @@ -571,6 +583,32 @@ public void close() throws IOException { mUfs.close(); } + @Override + public Iterator listStatusIterable( + String path, ListOptions options, String startAfter, int batchSize) throws IOException { + return new ManagedBlockingUfsMethod>() { + @Override + public Iterator execute() throws IOException { + return mUfs.listStatusIterable(path, options, startAfter, batchSize); + } + }.get(); + } + + @Override + public void performListingAsync( + String path, @Nullable String continuationToken, @Nullable String startAfter, + DescendantType descendantType, boolean checkStatus, Consumer onComplete, + Consumer onError) { + // given this is an async function, we do not execute it in the thread pool + mUfs.performListingAsync(path, continuationToken, startAfter, descendantType, + checkStatus, onComplete, onError); + } + + @Override + public RateLimiter getRateLimiter() { + return mUfs.getRateLimiter(); + } + /** * Utility class used to isolate calls into underlying UFS from concurrency compensation logic. * Note: This class used to make calls with a return value. diff --git a/core/common/src/main/java/alluxio/concurrent/jsr/ForkJoinTask.java b/core/common/src/main/java/alluxio/concurrent/jsr/ForkJoinTask.java index f8977e7bcc3d..ee1be4b4a78d 100644 --- a/core/common/src/main/java/alluxio/concurrent/jsr/ForkJoinTask.java +++ b/core/common/src/main/java/alluxio/concurrent/jsr/ForkJoinTask.java @@ -368,7 +368,7 @@ else if (t.doJoin() < NORMAL) */ public static > Collection invokeAll(Collection tasks) { if (!(tasks instanceof RandomAccess) || !(tasks instanceof List)) { - invokeAll(tasks.toArray(new ForkJoinTask[tasks.size()])); + invokeAll(tasks.toArray(new ForkJoinTask[0])); return tasks; } List> ts = (List>) tasks; diff --git a/core/common/src/main/java/alluxio/conf/AlluxioProperties.java b/core/common/src/main/java/alluxio/conf/AlluxioProperties.java index 389e25d22c99..41c22edb75f5 100644 --- a/core/common/src/main/java/alluxio/conf/AlluxioProperties.java +++ b/core/common/src/main/java/alluxio/conf/AlluxioProperties.java @@ -273,4 +273,11 @@ public Source getSource(PropertyKey key) { public String hash() { return mHash.get(); } + + /** + * @return the last update time of the properties + */ + public long getLastUpdateTime() { + return mHash.getLastUpdateTime(); + } } diff --git a/core/common/src/main/java/alluxio/conf/Configuration.java b/core/common/src/main/java/alluxio/conf/Configuration.java index 09d250f2a222..76a7ff98bac0 100644 --- a/core/common/src/main/java/alluxio/conf/Configuration.java +++ b/core/common/src/main/java/alluxio/conf/Configuration.java @@ -481,6 +481,7 @@ public static GetConfigurationPResponse loadConfiguration(InetSocketAddress addr public static InstancedConfiguration getClusterConf(GetConfigurationPResponse response, AlluxioConfiguration conf, Scope scope) { String clientVersion = conf.getString(PropertyKey.VERSION); + String clientUfsVersion = conf.getString(PropertyKey.UNDERFS_VERSION); LOG.debug("Alluxio {} (version {}) is trying to load cluster level configurations", scope, clientVersion); List clusterConfig = response.getClusterConfigsList(); @@ -493,6 +494,11 @@ public static InstancedConfiguration getClusterConf(GetConfigurationPResponse re scope, clientVersion, clusterVersion); clusterProps.remove(PropertyKey.VERSION); } + // Check ufs version. Avoid adding it to user properties if the two versions are the same. + String clusterUfsVersion = clusterProps.get(PropertyKey.UNDERFS_VERSION).toString(); + if (clientUfsVersion.equals(clusterUfsVersion)) { + clusterProps.remove(PropertyKey.UNDERFS_VERSION); + } // Merge conf returned by master as the cluster default into conf object AlluxioProperties props = conf.copyProperties(); props.merge(clusterProps, Source.CLUSTER_DEFAULT); @@ -641,4 +647,11 @@ private static Optional loadProperties(InputStream stream) { } return Optional.of(properties); } + + /** + * @return the last update time + */ + public static long getLastUpdateTime() { + return SERVER_CONFIG_REFERENCE.get().getLastUpdateTime(); + } } diff --git a/core/common/src/test/java/alluxio/conf/ConfigurationBuilder.java b/core/common/src/main/java/alluxio/conf/ConfigurationBuilder.java similarity index 95% rename from core/common/src/test/java/alluxio/conf/ConfigurationBuilder.java rename to core/common/src/main/java/alluxio/conf/ConfigurationBuilder.java index f306139fbd4e..87bf6cd90f55 100644 --- a/core/common/src/test/java/alluxio/conf/ConfigurationBuilder.java +++ b/core/common/src/main/java/alluxio/conf/ConfigurationBuilder.java @@ -24,7 +24,7 @@ public class ConfigurationBuilder { * @return the updated configuration builder */ public ConfigurationBuilder setProperty(PropertyKey key, Object value) { - mProperties.put(key, value.toString(), Source.RUNTIME); + mProperties.put(key, value, Source.RUNTIME); return this; } diff --git a/core/common/src/main/java/alluxio/conf/Hash.java b/core/common/src/main/java/alluxio/conf/Hash.java index ddfe6f04a91c..498a0782c318 100644 --- a/core/common/src/main/java/alluxio/conf/Hash.java +++ b/core/common/src/main/java/alluxio/conf/Hash.java @@ -29,6 +29,7 @@ public final class Hash { private final Supplier> mProperties; private final AtomicBoolean mShouldUpdate; private volatile String mVersion; + private volatile long mLastUpdateTime; /** * @param properties a stream of encoded properties @@ -70,10 +71,18 @@ public String get() { // If another thread has recomputed the version, no need to recompute again. if (mShouldUpdate.get()) { mVersion = compute(); + mLastUpdateTime = System.currentTimeMillis(); mShouldUpdate.set(false); } } } return mVersion; } + + /** + * @return the latest update time + */ + public synchronized long getLastUpdateTime() { + return mLastUpdateTime; + } } diff --git a/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java b/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java index 3bd384c01644..a56075ec7a31 100644 --- a/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java +++ b/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java @@ -377,7 +377,7 @@ public void validate() { + "If no JVM property is present, Alluxio will use default value '%s'.", key.getName(), key.getDefaultValue()); - if (PropertyKey.isDeprecated(key) && getSource(key).compareTo(Source.DEFAULT) != 0) { + if (PropertyKey.isDeprecated(key) && isSetByUser(key)) { LOG.warn("{} is deprecated. Please avoid using this key in the future. {}", key.getName(), PropertyKey.getDeprecationMessage(key)); } @@ -390,6 +390,7 @@ public void validate() { checkTieredLocality(); checkTieredStorage(); checkMasterThrottleThresholds(); + checkCheckpointZipConfig(); } @Override @@ -597,6 +598,19 @@ void checkTieredStorage() { } } + /** + * @throws IllegalStateException if invalid checkpoint zip configuration parameters are found + */ + private void checkCheckpointZipConfig() { + int compression = getInt( + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL); + if (compression < -1 || compression > 9) { + throw new IllegalStateException(String.format("Zip compression level for property key %s" + + " must be between -1 and 9 inclusive", + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL.getName())); + } + } + /** * @throws IllegalStateException if invalid throttle threshold parameters are found */ @@ -680,6 +694,13 @@ && getInt(PropertyKey.MASTER_THROTTLE_STRESSED_RPC_QUEUE_SIZE) } } + /** + * @return the last update time + */ + public long getLastUpdateTime() { + return mProperties.getLastUpdateTime(); + } + private class UnresolvablePropertyException extends Exception { public UnresolvablePropertyException(String msg) { diff --git a/core/common/src/main/java/alluxio/conf/OverlayConfiguration.java b/core/common/src/main/java/alluxio/conf/OverlayConfiguration.java new file mode 100644 index 000000000000..4b3cd1df16a1 --- /dev/null +++ b/core/common/src/main/java/alluxio/conf/OverlayConfiguration.java @@ -0,0 +1,188 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.conf; + +import com.google.common.collect.ImmutableMap; + +import java.time.Duration; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import javax.annotation.concurrent.ThreadSafe; + +/** + * Configuration that wraps around another configuration + * Priority for the value of a property follows: + * if a property has been set by user on the outer configuration, it takes priority + * if it is not set explicitly on the outer configuration, the inner configuration + * determines the value. + */ +@ThreadSafe +public class OverlayConfiguration implements AlluxioConfiguration { + /** + * Runtime level configuration. + */ + private final AlluxioConfiguration mOuterConf; + /** + * Default configuration. + */ + private final AlluxioConfiguration mInnerConf; + private final Set mKeySet; + private final Set mUserKeySet; + + /** + * Constructs a new instance with the specified references without copying the underlying + * properties. + * + * @param outerConf the runtime level configuration to override + * @param innerConf the default configuration + */ + public OverlayConfiguration(AlluxioConfiguration outerConf, + AlluxioConfiguration innerConf) { + mOuterConf = outerConf; + mInnerConf = innerConf; + mUserKeySet = new HashSet<>(); + mUserKeySet.addAll(outerConf.userKeySet()); + mUserKeySet.addAll(innerConf.userKeySet()); + mKeySet = new HashSet<>(); + mKeySet.addAll(innerConf.keySet()); + mKeySet.addAll(outerConf.keySet()); + } + + private AlluxioConfiguration conf(PropertyKey key) { + return mOuterConf.isSetByUser(key) ? mOuterConf : mInnerConf; + } + + @Override + public Object get(PropertyKey key) { + return conf(key).get(key); + } + + @Override + public Object get(PropertyKey key, ConfigurationValueOptions options) { + return conf(key).get(key, options); + } + + @Override + public boolean isSet(PropertyKey key) { + return conf(key).isSet(key); + } + + @Override + public boolean isSetByUser(PropertyKey key) { + return conf(key).isSetByUser(key); + } + + @Override + public Set keySet() { + return mKeySet; + } + + @Override + public Set userKeySet() { + return mUserKeySet; + } + + @Override + public String getString(PropertyKey key) { + return conf(key).getString(key); + } + + @Override + public int getInt(PropertyKey key) { + return conf(key).getInt(key); + } + + @Override + public long getLong(PropertyKey key) { + return conf(key).getLong(key); + } + + @Override + public double getDouble(PropertyKey key) { + return conf(key).getDouble(key); + } + + @Override + public boolean getBoolean(PropertyKey key) { + return conf(key).getBoolean(key); + } + + @Override + public List getList(PropertyKey key) { + return conf(key).getList(key); + } + + @Override + public > T getEnum(PropertyKey key, Class enumType) { + return conf(key).getEnum(key, enumType); + } + + @Override + public long getBytes(PropertyKey key) { + return conf(key).getBytes(key); + } + + @Override + public long getMs(PropertyKey key) { + return conf(key).getMs(key); + } + + @Override + public Duration getDuration(PropertyKey key) { + return conf(key).getDuration(key); + } + + @Override + public Class getClass(PropertyKey key) { + return conf(key).getClass(key); + } + + @Override + public Map getNestedProperties(PropertyKey prefixKey) { + return conf(prefixKey).getNestedProperties(prefixKey); + } + + @Override + public AlluxioProperties copyProperties() { + AlluxioProperties properties = mInnerConf.copyProperties(); + for (PropertyKey key : mOuterConf.userKeySet()) { + properties.put(key, mOuterConf.get(key), Source.RUNTIME); + } + return properties; + } + + @Override + public Source getSource(PropertyKey key) { + return conf(key).getSource(key); + } + + @Override + public Map toMap(ConfigurationValueOptions opts) { + ImmutableMap.Builder map = ImmutableMap.builder(); + // Cannot use Collectors.toMap because we support null keys. + keySet().forEach(key -> + map.put(key.getName(), conf(key).getOrDefault(key, null, opts))); + return map.build(); + } + + @Override + public void validate() { + new InstancedConfiguration(copyProperties()).validate(); + } + + @Override + public boolean clusterDefaultsLoaded() { + return mInnerConf.clusterDefaultsLoaded(); + } +} diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 90ca0fb97fb2..c0f64cee8913 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -52,6 +52,7 @@ import alluxio.security.authentication.AuthType; import alluxio.util.FormatUtils; import alluxio.util.OSUtils; +import alluxio.util.compression.DirectoryMarshaller; import alluxio.util.io.PathUtils; import alluxio.worker.block.BlockStoreType; import alluxio.worker.block.management.BackoffStrategy; @@ -67,6 +68,7 @@ import com.google.common.collect.ImmutableList; import com.sun.management.OperatingSystemMXBean; import io.netty.util.ResourceLeakDetector; +import org.rocksdb.CompressionType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -410,7 +412,8 @@ public Builder setDefaultSupplier(Supplier supplier, String description) * @return the updated builder instance */ public Builder setDefaultValue(Object defaultValue) { - checkArgument(validateValue(defaultValue, mType, mEnumType, mValueValidationFunction)); + checkArgument(validateValue(defaultValue, mType, mEnumType, mValueValidationFunction), + String.format("default value %s of %s validate failed", defaultValue, mName)); mDefaultValue = formatValue(defaultValue, mType, mEnumType, mDelimiter); return this; } @@ -586,6 +589,25 @@ public String toString() { .setScope(Scope.SERVER) .setIsHidden(true) .build(); + public static final PropertyKey EXIT_COLLECT_INFO = + booleanBuilder(Name.EXIT_COLLECT_INFO) + .setDefaultValue(true) + .setDescription("If true, the process will dump metrics and jstack into the log folder. " + + "This only applies to Alluxio master and worker processes.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey GRPC_REFLECTION_ENABLED = + booleanBuilder(Name.GRPC_REFLECTION_ENABLED) + .setDefaultValue(false) + .setDescription("If true, grpc reflection will be enabled on alluxio grpc servers, " + + "including masters, workers, job masters and job workers. " + + " This makes grpc tools such as grpcurl or grpcui can send grpc requests to " + + "the master server easier without knowing the protobufs. " + + "This is a debug option.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.ALL) + .build(); public static final PropertyKey HOME = stringBuilder(Name.HOME) .setDefaultValue("/opt/alluxio") @@ -836,6 +858,50 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.SERVER) .build(); + public static final PropertyKey WEB_CORS_ALLOW_CREDENTIAL = + booleanBuilder(Name.WEB_CORS_ALLOW_CREDENTIAL) + .setDefaultValue(false) + .setDescription("Enable request include credential.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey WEB_CORS_ALLOW_HEADERS = + stringBuilder(Name.WEB_CORS_ALLOW_HEADERS) + .setDefaultValue("*") + .setDescription("Which headers is allowed for cors. use * allow all any header.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey WEB_CORS_ALLOW_METHODS = + stringBuilder(Name.WEB_CORS_ALLOW_METHODS) + .setDefaultValue("*") + .setDescription("Which methods is allowed for cors. use * allow all any method.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey WEB_CORS_ALLOW_ORIGINS = + stringBuilder(Name.WEB_CORS_ALLOW_ORIGINS) + .setDefaultValue("*") + .setDescription("Which origins is allowed for cors. use * allow all any origin.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey WEB_CORS_EXPOSED_HEADERS = + stringBuilder(Name.WEB_CORS_EXPOSED_HEADERS) + .setDefaultValue("*") + .setDescription("Which headers are allowed to set in response when access " + + "cross-origin resource. use * allow all any header.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey WEB_CORS_MAX_AGE = + intBuilder(Name.WEB_CORS_MAX_AGE) + .setDefaultValue(-1) + .setDescription("Maximum number of seconds the results can be cached. " + + "-1 means no cache.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey WEB_REFRESH_INTERVAL = durationBuilder(Name.WEB_REFRESH_INTERVAL) .setDefaultValue("15s") @@ -1168,6 +1234,12 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .setScope(Scope.SERVER) .build(); + public static final PropertyKey UNDERFS_OBJECT_STORE_STREAMING_UPLOAD_PART_TIMEOUT = + durationBuilder(Name.UNDERFS_OBJECT_STORE_STREAMING_UPLOAD_PART_TIMEOUT) + .setDescription("Timeout for uploading part when using streaming uploads.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey UNDERFS_OBJECT_STORE_BREADCRUMBS_ENABLED = booleanBuilder(Name.UNDERFS_OBJECT_STORE_BREADCRUMBS_ENABLED) .setDefaultValue(true) @@ -1225,12 +1297,14 @@ public String toString() { .build(); public static final PropertyKey UNDERFS_EVENTUAL_CONSISTENCY_RETRY_MAX_NUM = intBuilder(Name.UNDERFS_EVENTUAL_CONSISTENCY_RETRY_MAX_NUM) - .setDefaultValue(20) + .setDefaultValue(0) .setDescription("To handle eventually consistent storage semantics " + "for certain under storages, Alluxio will perform retries " + "when under storage metadata doesn't match Alluxio's expectations. " + "These retries use exponential backoff. " - + "This property determines the maximum number of retries.") + + "This property determines the maximum number of retries. " + + "This property defaults to 0 as modern object store UFSs provide strong " + + "consistency.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.SERVER) .build(); @@ -1275,6 +1349,46 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.SERVER) .build(); + public static final PropertyKey UNDERFS_OSS_ECS_RAM_ROLE = + stringBuilder(Name.UNDERFS_OSS_ECS_RAM_ROLE) + .setAlias("alluxio.underfs.oss.ecs.ram.role") + .setDescription("The RAM role of current owner of ECS.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OSS_RETRY_MAX = + intBuilder(Name.UNDERFS_OSS_RETRY_MAX) + .setAlias("alluxio.underfs.oss.retry.max") + .setDefaultValue(3) + .setDescription("The maximum number of OSS error retry.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OSS_STS_ECS_METADATA_SERVICE_ENDPOINT = + stringBuilder(Name.UNDERFS_OSS_STS_ECS_METADATA_SERVICE_ENDPOINT) + .setAlias("alluxio.underfs.oss.sts.ecs.metadata.service.endpoint") + .setDefaultValue("http://100.100.100.200/latest/meta-data/ram/security-credentials/") + .setDescription("The ECS metadata service endpoint for Aliyun STS") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OSS_STS_ENABLED = + booleanBuilder(Name.UNDERFS_OSS_STS_ENABLED) + .setAlias("alluxio.underfs.oss.sts.enabled") + .setDefaultValue(false) + .setDescription("Whether to enable oss STS(Security Token Service).") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OSS_STS_TOKEN_REFRESH_INTERVAL_MS = + durationBuilder(Name.UNDERFS_OSS_STS_TOKEN_REFRESH_INTERVAL_MS) + .setAlias("alluxio.underfs.oss.sts.token.refresh.interval.ms") + .setDefaultValue("30m") + .setDescription("Time before an OSS Security Token is considered expired " + + "and will be automatically renewed") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey UNDERFS_S3_ADMIN_THREADS_MAX = intBuilder(Name.UNDERFS_S3_ADMIN_THREADS_MAX) .setDefaultValue(20) @@ -1660,6 +1774,56 @@ public String toString() { .setScope(Scope.SERVER) .setDisplayType(DisplayType.CREDENTIALS) .build(); + public static final PropertyKey UNDERFS_OSS_INTERMEDIATE_UPLOAD_CLEAN_AGE = + durationBuilder(Name.UNDERFS_OSS_INTERMEDIATE_UPLOAD_CLEAN_AGE) + .setDefaultValue("3day") + .setDescription("Streaming uploads may not have been completed/aborted correctly " + + "and need periodical ufs cleanup. If ufs cleanup is enabled, " + + "intermediate multipart uploads in all non-readonly OSS mount points " + + "older than this age will be cleaned. This may impact other " + + "ongoing upload operations, so a large clean age is encouraged.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OSS_STREAMING_UPLOAD_ENABLED = + booleanBuilder(Name.UNDERFS_OSS_STREAMING_UPLOAD_ENABLED) + .setDefaultValue(false) + .setDescription("(Experimental) If true, using streaming upload to write to OSS.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OSS_STREAMING_UPLOAD_PARTITION_SIZE = + dataSizeBuilder(Name.UNDERFS_OSS_STREAMING_UPLOAD_PARTITION_SIZE) + .setDefaultValue("64MB") + .setDescription("Maximum allowable size of a single buffer file when using " + + "OSS streaming upload. When the buffer file reaches the partition size, " + + "it will be uploaded and the upcoming data will write to other buffer files." + + "If the partition size is too small, OSS upload speed might be affected. ") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OSS_STREAMING_UPLOAD_THREADS = + intBuilder(Name.UNDERFS_OSS_STREAMING_UPLOAD_THREADS) + .setDefaultValue(20) + .setDescription("the number of threads to use for streaming upload data to OSS.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OSS_DEFAULT_MODE = + stringBuilder(Name.UNDERFS_OSS_DEFAULT_MODE) + .setAlias("alluxio.underfs.oss.default.mode") + .setDefaultValue("0700") + .setDescription("Mode (in octal notation) for OSS objects if mode cannot be discovered.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OSS_OWNER_ID_TO_USERNAME_MAPPING = + stringBuilder(Name.UNDERFS_OSS_OWNER_ID_TO_USERNAME_MAPPING) + .setDescription("Optionally, specify a preset oss canonical id to Alluxio username " + + "static mapping, in the format \"id1=user1;id2=user2\". ") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey S3A_ACCESS_KEY = stringBuilder(Name.S3A_ACCESS_KEY) .setAlias(Name.AWS_ACCESS_KEY) .setDescription("The access key of S3 bucket.") @@ -1811,6 +1975,41 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .setScope(Scope.SERVER) .build(); + public static final PropertyKey UNDERFS_OBS_INTERMEDIATE_UPLOAD_CLEAN_AGE = + durationBuilder(Name.UNDERFS_OBS_INTERMEDIATE_UPLOAD_CLEAN_AGE) + .setDefaultValue("3day") + .setDescription("Streaming uploads may not have been completed/aborted correctly " + + "and need periodical ufs cleanup. If ufs cleanup is enabled, " + + "intermediate multipart uploads in all non-readonly OBS mount points " + + "older than this age will be cleaned. This may impact other " + + "ongoing upload operations, so a large clean age is encouraged.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OBS_STREAMING_UPLOAD_ENABLED = + booleanBuilder(Name.UNDERFS_OBS_STREAMING_UPLOAD_ENABLED) + .setDefaultValue(false) + .setDescription("(Experimental) If true, using streaming upload to write to OBS.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OBS_STREAMING_UPLOAD_PARTITION_SIZE = + dataSizeBuilder(Name.UNDERFS_OBS_STREAMING_UPLOAD_PARTITION_SIZE) + .setDefaultValue("64MB") + .setDescription("Maximum allowable size of a single buffer file when using " + + "S3A streaming upload. When the buffer file reaches the partition size, " + + "it will be uploaded and the upcoming data will write to other buffer files." + + "If the partition size is too small, OBS upload speed might be affected. ") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OBS_STREAMING_UPLOAD_THREADS = + intBuilder(Name.UNDERFS_OBS_STREAMING_UPLOAD_THREADS) + .setDefaultValue(20) + .setDescription("the number of threads to use for streaming upload data to OBS.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); // // Mount table related properties // @@ -2217,6 +2416,28 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) .build(); + public static final PropertyKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_TYPE = + enumBuilder(Name.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_TYPE, + DirectoryMarshaller.Type.class) + .setDefaultValue(DirectoryMarshaller.Type.NO_COMPRESSION) + .setDescription("The type of compression to use when transferring a snapshot from one " + + "master to another. Options are NO_COMPRESSION, GZIP, TAR_GZIP") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.MASTER) + .build(); + public static final PropertyKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL = + intBuilder(Name.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL) + .setAlias(Name.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_LEVEL) + .setDefaultValue(1) + .setDescription("The zip compression level of sending a snapshot from one master to " + + "another. Only applicable when " + + "alluxio.master.embedded.journal.snapshot.replication.compression.type is not " + + "NO_COMPRESSION. The zip format defines ten levels of compression, ranging from 0 " + + "(no compression, but very fast) to 9 (best compression, but slow). " + + "Or -1 for the system default compression level.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.MASTER) + .build(); public static final PropertyKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_CHUNK_SIZE = dataSizeBuilder(Name.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_CHUNK_SIZE) .setDefaultValue("4MB") @@ -2274,6 +2495,24 @@ public String toString() { + "the master addresses.") .setScope(Scope.ALL) .build(); + public static final PropertyKey MASTER_FAILOVER_COLLECT_INFO = + booleanBuilder(Name.MASTER_FAILOVER_COLLECT_INFO) + .setDefaultValue(true) + .setDescription("If true, the primary master will persist metrics and jstack into " + + "the log folder when it transitions to standby. ") + .setScope(Scope.MASTER) + .build(); + + public static final PropertyKey MASTER_FILE_ACCESS_TIME_UPDATER_ENABLED = + booleanBuilder(Name.MASTER_FILE_ACCESS_TIME_UPDATER_ENABLED) + .setDefaultValue(true) + .setDescription("If enabled, file access time updater will update the file last " + + "access time when an inode is accessed. This property can be turned off to improve " + + "performance and reduce the number of journal entries if your application does " + + "not rely on the file access time metadata.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.MASTER) + .build(); public static final PropertyKey MASTER_FILE_ACCESS_TIME_JOURNAL_FLUSH_INTERVAL = durationBuilder(Name.MASTER_FILE_ACCESS_TIME_JOURNAL_FLUSH_INTERVAL) .setDefaultValue("1h") @@ -2382,19 +2621,35 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) .build(); + public static final PropertyKey MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_TYPE = + enumBuilder(Name.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_TYPE, CompressionType.class) + // default value informed by https://github.com/facebook/rocksdb/wiki/Compression + .setDefaultValue(CompressionType.LZ4_COMPRESSION) + .setDescription("The compression algorithm that RocksDB uses internally. One of " + + "{NO_COMPRESSION SNAPPY_COMPRESSION ZLIB_COMPRESSION BZLIB2_COMPRESSION " + + "LZ4_COMPRESSION LZ4HC_COMPRESSION XPRESS_COMPRESSION ZSTD_COMPRESSION " + + "DISABLE_COMPRESSION_OPTION}") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.MASTER) + .build(); + public static final PropertyKey MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT = + durationBuilder(Name.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT) + .setDefaultValue("10s") + .setIsHidden(true) + .setDescription("Before RocksDB is shut down/restarted/restored, Master will wait for " + + "ongoing operations to complete/abort. This timeout specifies how long to wait " + + "before forcing the action. Then the leftover operations will fail. Normally the " + + "wait will be short, because when master fails over/shuts down/replays journal, " + + "all other concurrent operations should have been stopped. This is just one extra " + + "safety guard. Therefore we do not recommend setting this manually.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.MASTER) + .build(); public static final PropertyKey MASTER_METASTORE_ROCKS_PARALLEL_BACKUP = booleanBuilder(Name.MASTER_METASTORE_ROCKS_PARALLEL_BACKUP) .setDefaultValue(false) - .setDescription("Whether to backup rocksdb in parallel") - .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) - .setScope(Scope.MASTER) - .build(); - public static final PropertyKey MASTER_METASTORE_ROCKS_PARALLEL_BACKUP_COMPRESSION_LEVEL = - intBuilder(Name.MASTER_METASTORE_ROCKS_PARALLEL_BACKUP_COMPRESSION_LEVEL) - .setDefaultValue(6) - .setDescription("The zip compression level of backing up rocksdb in parallel, the zip" - + " format defines ten levels of compression, ranging from 0" - + " (no compression, but very fast) to 9 (best compression, but slow)") + .setDescription(format("Whether to checkpoint rocksdb in parallel using the number of" + + " threads set by %s.", Name.MASTER_METASTORE_ROCKS_PARALLEL_BACKUP_THREADS)) .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) .build(); @@ -2884,7 +3139,7 @@ public String toString() { + "UFS), EMBEDDED (use a journal embedded in the masters), and NOOP (do not use a " + "journal)") .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) - .setScope(Scope.MASTER) + .setScope(Scope.ALL) .build(); public static final PropertyKey MASTER_JOURNAL_LOG_SIZE_BYTES_MAX = dataSizeBuilder(Name.MASTER_JOURNAL_LOG_SIZE_BYTES_MAX) @@ -2894,14 +3149,6 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) .build(); - public static final PropertyKey MASTER_JOURNAL_LOG_CONCURRENCY_MAX = - intBuilder(Name.MASTER_JOURNAL_LOG_CONCURRENCY_MAX) - .setDefaultValue(256) - .setDescription("Max concurrency for notifyTermIndexUpdated method, be sure it's " - + "enough") - .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) - .setScope(Scope.MASTER) - .build(); public static final PropertyKey MASTER_JOURNAL_REQUEST_DATA_TIMEOUT = durationBuilder(Name.MASTER_JOURNAL_REQUEST_DATA_TIMEOUT) .setDefaultValue(20000) @@ -2911,7 +3158,7 @@ public String toString() { .build(); public static final PropertyKey MASTER_JOURNAL_REQUEST_INFO_TIMEOUT = durationBuilder(Name.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT) - .setDefaultValue(20000) + .setDefaultValue(10_000) .setDescription("Time to wait for follower to respond to request to get information" + " about its latest snapshot") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) @@ -3087,6 +3334,32 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .setScope(Scope.MASTER) .build(); + public static final PropertyKey MASTER_PROXY_TIMEOUT_MS = + durationBuilder(Name.MASTER_PROXY_TIMEOUT_MS) + .setAlias("alluxio.master.proxy.timeout.ms") + .setDefaultValue("5m") + .setDescription("An Alluxio Proxy instance will maintain heartbeat to the primary " + + "Alluxio Master. No heartbeat more than this timeout indicates a lost Proxy.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.MASTER) + .build(); + public static final PropertyKey MASTER_PROXY_CHECK_HEARTBEAT_INTERVAL = + durationBuilder(Name.MASTER_PROXY_CHECK_HEARTBEAT_INTERVAL) + .setDefaultValue("1min") + .setDescription("The master will periodically check the last heartbeat time from all " + + "Proxy instances. This key specifies the frequency of the check.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.MASTER) + .build(); + public static final PropertyKey MASTER_LOST_PROXY_DELETION_TIMEOUT_MS = + durationBuilder(Name.MASTER_LOST_PROXY_DELETION_TIMEOUT_MS) + .setAlias("alluxio.master.lost.proxy.deletion.timeout.ms") + .setDefaultValue("30min") + .setDescription("If an Alluxio Proxy has been lost for more than this timeout, " + + "the master will totally forget this worker.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.MASTER) + .build(); public static final PropertyKey MASTER_RPC_PORT = intBuilder(Name.MASTER_RPC_PORT) .setAlias("alluxio.master.port") @@ -3124,6 +3397,14 @@ public String toString() { + "if this property is true. This property is available since 1.7.1") .setScope(Scope.MASTER) .build(); + public static final PropertyKey MASTER_STATE_LOCK_ERROR_THRESHOLD = + intBuilder(Name.MASTER_STATE_LOCK_ERROR_THRESHOLD) + .setDefaultValue(20) + .setDescription("Used to trace and debug state lock issues. When a thread recursively " + + "acquires the state lock more than threshold, log an error for further debugging.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.IGNORE) + .setScope(Scope.MASTER) + .build(); public static final PropertyKey MASTER_TIERED_STORE_GLOBAL_LEVEL0_ALIAS = stringBuilder(Name.MASTER_TIERED_STORE_GLOBAL_LEVEL0_ALIAS) .setDefaultValue(Constants.MEDIUM_MEM) @@ -3475,6 +3756,45 @@ public String toString() { .setIsHidden(true) .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .build(); + public static final PropertyKey + MASTER_METADATA_SYNC_GET_DIRECTORY_STATUS_SKIP_LOADING_CHILDREN = + booleanBuilder(Name.MASTER_METADATA_SYNC_GET_DIRECTORY_STATUS_SKIP_LOADING_CHILDREN) + .setDescription( + "If set to true, skip loading children during metadata sync when " + + "descendant type is set to NONE, for example, a metadata sync triggered " + + "by a getStatus on a directory.") + .setScope(Scope.MASTER) + .setDefaultValue(true) + .setIsHidden(true) + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .build(); + public static final PropertyKey MASTER_METADATA_SYNC_UFS_CONCURRENT_GET_STATUS = + booleanBuilder(Name.MASTER_METADATA_SYNC_UFS_CONCURRENT_GET_STATUS) + .setDefaultValue(true) + .setDescription("Allows metadata sync operations on single items (i.e. getStatus) " + + "operations to run concurrently with metadata sync operations on directories " + + "(i.e listings) on intersecting paths.") + .setScope(Scope.MASTER) + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .build(); + public static final PropertyKey MASTER_METADATA_SYNC_UFS_CONCURRENT_LISTING = + booleanBuilder(Name.MASTER_METADATA_SYNC_UFS_CONCURRENT_LISTING) + .setDefaultValue(true) + .setDescription("Allows non-recursive metadata sync operations directories " + + "to run concurrently with recursive metadata sync operations on " + + "intersecting paths.") + .setScope(Scope.MASTER) + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .build(); + public static final PropertyKey MASTER_METADATA_SYNC_UFS_CONCURRENT_LOADS = + intBuilder(Name.MASTER_METADATA_SYNC_UFS_CONCURRENT_LOADS) + .setDefaultValue(100) + .setDescription("The number of concurrently running UFS listing operations " + + "during metadata sync. This includes loads that have completed, but " + + "have not yet been processed.") + .setScope(Scope.MASTER) + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .build(); // In Java8 in container environment Runtime.availableProcessors() always returns 1, // which is not the actual number of cpus, so we set a safe default value 32. public static final PropertyKey MASTER_METADATA_SYNC_UFS_PREFETCH_POOL_SIZE = @@ -3514,6 +3834,14 @@ public String toString() { .setScope(Scope.MASTER) .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .build(); + public static final PropertyKey MASTER_METADATA_SYNC_UFS_RATE_LIMIT = + longBuilder(Name.MASTER_METADATA_SYNC_UFS_RATE_LIMIT) + .setDescription("The maximum number of operations per second to execute " + + "on an individual UFS during metadata sync operations. If 0 or unset " + + "then no rate limit is enforced.") + .setScope(Scope.MASTER) + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .build(); public static final PropertyKey MASTER_METADATA_SYNC_IGNORE_TTL = booleanBuilder(Name.MASTER_METADATA_SYNC_IGNORE_TTL) .setDefaultValue(false) @@ -3660,6 +3988,15 @@ public String toString() { .setDescription("Whether a standby master runs a web server") .setScope(Scope.SERVER) .build(); + public static final PropertyKey STANDBY_MASTER_GRPC_ENABLED = + booleanBuilder(Name.STANDBY_MASTER_GRPC_ENABLED) + .setDefaultValue(true) + .setIsHidden(true) + .setDescription("Whether a standby master runs a grpc server. WARNING: disabling this " + + "will prevent master snapshotting from working correctly.") + .setScope(Scope.ALL) + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .build(); // // Throttle @@ -4345,6 +4682,17 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.WORKER) .build(); + public static final PropertyKey WORKER_REGISTER_TO_ALL_MASTERS = + booleanBuilder(Name.WORKER_REGISTER_TO_ALL_MASTERS) + .setDefaultValue(false) + .setDescription("If enabled, workers will register themselves to all masters, " + + "instead of primary master only. This can be used to save the " + + "master failover time because the new primary immediately knows " + + "all existing workers and blocks. Can only be enabled when " + + Name.STANDBY_MASTER_GRPC_ENABLED + " is turned on.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.ALL) + .build(); public static final PropertyKey WORKER_REMOTE_IO_SLOW_THRESHOLD = durationBuilder(Name.WORKER_REMOTE_IO_SLOW_THRESHOLD) .setDefaultValue("10s") @@ -4364,7 +4712,20 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.WORKER) .build(); - + public static final PropertyKey WORKER_BLOCK_HEARTBEAT_REPORT_SIZE_THRESHOLD = + intBuilder(Name.WORKER_BLOCK_HEARTBEAT_REPORT_SIZE_THRESHOLD) + .setDefaultValue(1_000_000) + .setDescription( + "When " + Name.WORKER_REGISTER_TO_ALL_MASTERS + "=true, " + + "because a worker will send block reports to all masters, " + + "we use a threshold to limit the unsent block report size in worker's memory. " + + "If the worker block heartbeat is larger than the threshold, " + + "we discard the heartbeat message and force " + + "the worker to register with that master with a full report." + ) + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.WORKER) + .build(); public static final PropertyKey WORKER_PAGE_STORE_ASYNC_RESTORE_ENABLED = booleanBuilder(Name.WORKER_PAGE_STORE_ASYNC_RESTORE_ENABLED) .setDefaultValue(true) @@ -4816,14 +5177,6 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.WORKER) .build(); - public static final PropertyKey WORKER_UFS_BLOCK_OPEN_TIMEOUT_MS = - durationBuilder(Name.WORKER_UFS_BLOCK_OPEN_TIMEOUT_MS) - .setAlias("alluxio.worker.ufs.block.open.timeout.ms") - .setDefaultValue("5min") - .setDescription("Timeout to open a block from UFS.") - .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) - .setScope(Scope.WORKER) - .build(); public static final PropertyKey WORKER_UFS_INSTREAM_CACHE_ENABLED = booleanBuilder(Name.WORKER_UFS_INSTREAM_CACHE_ENABLED) .setDefaultValue(true) @@ -4956,6 +5309,15 @@ public String toString() { // // Proxy related properties // + public static final PropertyKey PROXY_MASTER_HEARTBEAT_INTERVAL = + durationBuilder(Name.PROXY_MASTER_HEARTBEAT_INTERVAL) + .setAlias("alluxio.proxy.master.heartbeat.interval.ms") + .setDefaultValue("10sec") + .setDescription("Proxy instances maintain a heartbeat with the primary master. " + + "This key specifies the heartbeat interval.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey PROXY_S3_WRITE_TYPE = enumBuilder(Name.PROXY_S3_WRITE_TYPE, WriteType.class) .setDefaultValue(WriteType.CACHE_THROUGH) @@ -4981,8 +5343,8 @@ public String toString() { .build(); public static final PropertyKey PROXY_S3_MULTIPART_UPLOAD_CLEANER_ENABLED = booleanBuilder(Name.PROXY_S3_MULTIPART_UPLOAD_CLEANER_ENABLED) - .setDefaultValue(true) - .setDescription("Whether or not to enable automatic cleanup of long-running " + .setDefaultValue(false) + .setDescription("Enable automatic cleanup of long-running " + "multipart uploads.") .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .setScope(Scope.SERVER) @@ -5077,6 +5439,65 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .setScope(Scope.SERVER) .build(); + public static final PropertyKey PROXY_S3_V2_VERSION_ENABLED = + booleanBuilder(Name.PROXY_S3_V2_VERSION_ENABLED) + .setDefaultValue(true) + .setDescription("(Experimental) V2, an optimized version of " + + "Alluxio s3 proxy service.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_PROCESSING_ENABLED = + booleanBuilder(Name.PROXY_S3_V2_ASYNC_PROCESSING_ENABLED) + .setDefaultValue(false) + .setDescription("(Experimental) If enabled, handle S3 request " + + "in async mode when v2 version of Alluxio s3 " + + "proxy service is enabled.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_LIGHT_POOL_CORE_THREAD_NUMBER = + intBuilder(Name.PROXY_S3_V2_ASYNC_LIGHT_POOL_CORE_THREAD_NUMBER) + .setDefaultValue(8) + .setDescription("Core thread number for async light thread pool.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_LIGHT_POOL_MAXIMUM_THREAD_NUMBER = + intBuilder(Name.PROXY_S3_V2_ASYNC_LIGHT_POOL_MAXIMUM_THREAD_NUMBER) + .setDefaultValue(64) + .setDescription("Maximum thread number for async light thread pool.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_LIGHT_POOL_QUEUE_SIZE = + intBuilder(Name.PROXY_S3_V2_ASYNC_LIGHT_POOL_QUEUE_SIZE) + .setDefaultValue(64 * 1024) + .setDescription("Queue size for async light thread pool.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_HEAVY_POOL_CORE_THREAD_NUMBER = + intBuilder(Name.PROXY_S3_V2_ASYNC_HEAVY_POOL_CORE_THREAD_NUMBER) + .setDefaultValue(8) + .setDescription("Core thread number for async heavy thread pool.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_HEAVY_POOL_MAXIMUM_THREAD_NUMBER = + intBuilder(Name.PROXY_S3_V2_ASYNC_HEAVY_POOL_MAXIMUM_THREAD_NUMBER) + .setDefaultValue(64) + .setDescription("Maximum thread number for async heavy thread pool.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_HEAVY_POOL_QUEUE_SIZE = + intBuilder(Name.PROXY_S3_V2_ASYNC_HEAVY_POOL_QUEUE_SIZE) + .setDefaultValue(64 * 1024) + .setDescription("Queue size for async heavy thread pool.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey PROXY_STREAM_CACHE_TIMEOUT_MS = durationBuilder(Name.PROXY_STREAM_CACHE_TIMEOUT_MS) .setAlias("alluxio.proxy.stream.cache.timeout.ms") @@ -5113,6 +5534,34 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.SERVER) .build(); + public static final PropertyKey PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS = + durationBuilder(Name.PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS) + .setAlias("alluxio.proxy.s3.bucketpathcache.timeout.ms") + .setDefaultValue("0min") + .setDescription("Expire bucket path statistics in cache for this time period. " + + "Set 0min to disable the cache. If enabling the cache, " + + "be careful that Alluxio S3 API will behave differently from AWS S3 API" + + " if bucket path cache entries become stale.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.IGNORE) + .setScope(Scope.NONE) + .build(); + public static final PropertyKey PROXY_S3_SINGLE_CONNECTION_READ_RATE_LIMIT_MB = + intBuilder(Name.PROXY_S3_SINGLE_CONNECTION_READ_RATE_LIMIT_MB) + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setDescription("Limit the maximum read speed for each connection. " + + "Set value less than or equal to 0 to disable rate limits.") + .setDefaultValue(0) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_GLOBAL_READ_RATE_LIMIT_MB = + intBuilder(Name.PROXY_S3_GLOBAL_READ_RATE_LIMIT_MB) + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setDescription("Limit the maximum read speed for all connections. " + + "Set value less than or equal to 0 to disable rate limits.") + .setDefaultValue(0) + .setScope(Scope.SERVER) + + .build(); // // Locality related properties @@ -5384,6 +5833,12 @@ public String toString() { + "before attempting to delete persisted directories recursively.") .setScope(Scope.CLIENT) .build(); + public static final PropertyKey USER_FILE_DIRECT_ACCESS = + listBuilder(Name.USER_FILE_DIRECT_ACCESS) + .setDescription("A list of Alluxio paths that are not read or write cached and " + + "always fetches from the ufs for the latest listing") + .setScope(Scope.CLIENT) + .build(); public static final PropertyKey USER_FILE_MASTER_CLIENT_POOL_SIZE_MIN = intBuilder(Name.USER_FILE_MASTER_CLIENT_POOL_SIZE_MIN) .setDefaultValue(0) @@ -5500,9 +5955,9 @@ public String toString() { .build(); public static final PropertyKey USER_FILE_CREATE_TTL_ACTION = enumBuilder(Name.USER_FILE_CREATE_TTL_ACTION, TtlAction.class) - .setDefaultValue(TtlAction.DELETE) + .setDefaultValue(TtlAction.FREE) .setDescription("When file's ttl is expired, the action performs on it. Options: " - + "DELETE (default) or FREE") + + "FREE(default), DELETE_ALLUXIO or DELETE") .setScope(Scope.CLIENT) .build(); public static final PropertyKey USER_FILE_UFS_TIER_ENABLED = @@ -5753,6 +6208,27 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.CLIENT) .build(); + public static final PropertyKey USER_CLIENT_CACHE_TTL_ENABLED = + booleanBuilder(Name.USER_CLIENT_CACHE_TTL_ENABLED) + .setDefaultValue(false) + .setDescription("Whether to support cache quota.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.CLIENT) + .build(); + public static final PropertyKey USER_CLIENT_CACHE_TTL_CHECK_INTERVAL_SECONDS = + longBuilder(Name.USER_CLIENT_CACHE_TTL_CHECK_INTERVAL_SECONDS) + .setDefaultValue(3600) + .setDescription("TTL check interval time in seconds.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.IGNORE) + .setScope(Scope.CLIENT) + .build(); + public static final PropertyKey USER_CLIENT_CACHE_TTL_THRESHOLD_SECONDS = + longBuilder(Name.USER_CLIENT_CACHE_TTL_THRESHOLD_SECONDS) + .setDefaultValue(3600 * 3) + .setDescription("TTL threshold time in seconds.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.IGNORE) + .setScope(Scope.CLIENT) + .build(); public static final PropertyKey USER_CLIENT_CACHE_SIZE = listBuilder(Name.USER_CLIENT_CACHE_SIZE) .setDefaultValue("512MB") @@ -5774,6 +6250,23 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.CLIENT) .build(); + public static final PropertyKey USER_CLIENT_CACHE_IDENTIFIER_INCLUDE_MTIME = + booleanBuilder(Name.USER_CLIENT_CACHE_IDENTIFIER_INCLUDE_MTIME) + .setDefaultValue(false) + .setDescription("If this is enabled, client-side cache will include modification time " + + "while calculating the identifier of a file.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.CLIENT) + .build(); + + public static final PropertyKey USER_CLIENT_REPORT_VERSION_ENABLED = + booleanBuilder(Name.USER_CLIENT_REPORT_VERSION_ENABLED) + .setDefaultValue(false) + .setDescription("Whether the client reports version information to the server.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.CLIENT) + .build(); + public static final PropertyKey USER_FILE_WRITE_TYPE_DEFAULT = enumBuilder(Name.USER_FILE_WRITE_TYPE_DEFAULT, WriteType.class) .setDefaultValue(WriteType.ASYNC_THROUGH) @@ -5831,6 +6324,14 @@ public String toString() { + "when Alluxio workers are required but not ready.") .setScope(Scope.CLIENT) .build(); + public static final PropertyKey USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS = + booleanBuilder(Name.USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS) + .setDefaultValue(false) + .setDescription("If enabled, the mount info will be excluded from the response " + + "when a HDFS client calls alluxio to list status on a directory.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.IGNORE) + .setScope(Scope.CLIENT) + .build(); public static final PropertyKey USER_LOCAL_READER_CHUNK_SIZE_BYTES = dataSizeBuilder(Name.USER_LOCAL_READER_CHUNK_SIZE_BYTES) .setDefaultValue("8MB") @@ -5859,6 +6360,13 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.CLIENT) .build(); + public static final PropertyKey USER_MASTER_POLLING_CONCURRENT = + booleanBuilder(Name.USER_MASTER_POLLING_CONCURRENT) + .setDefaultValue(false) + .setDescription("Whether to concurrently polling the master.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.CLIENT) + .build(); public static final PropertyKey USER_METADATA_CACHE_ENABLED = booleanBuilder(Name.USER_METADATA_CACHE_ENABLED) .setDefaultValue(false) @@ -6341,6 +6849,8 @@ public String toString() { .setDescription(format("When an Alluxio client reads a file from the UFS, it " + "delegates the read to an Alluxio worker. The client uses this policy to choose " + "which worker to read through. Built-in choices: %s.", Arrays.asList( + javadocLink("alluxio.client.block.policy.CapacityBasedDeterministicHashPolicy"), + javadocLink("alluxio.client.block.policy.CapacityBaseRandomPolicy"), javadocLink("alluxio.client.block.policy.DeterministicHashPolicy"), javadocLink("alluxio.client.block.policy.LocalFirstAvoidEvictionPolicy"), javadocLink("alluxio.client.block.policy.LocalFirstPolicy"), @@ -6354,8 +6864,9 @@ public String toString() { intBuilder(Name.USER_UFS_BLOCK_READ_LOCATION_POLICY_DETERMINISTIC_HASH_SHARDS) .setDefaultValue(1) .setDescription("When alluxio.user.ufs.block.read.location.policy is set to " - + "alluxio.client.block.policy.DeterministicHashPolicy, this specifies the number of " - + "hash shards.") + + "alluxio.client.block.policy.DeterministicHashPolicy or " + + "alluxio.client.block.policy.CapacityBasedDeterministicHashPolicy, " + + "this specifies the number of hash shards.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.CLIENT) .build(); @@ -6792,10 +7303,25 @@ public String toString() { // TODO(ns) Fix default value to handle other UFS types public static final PropertyKey UNDERFS_VERSION = stringBuilder(Name.UNDERFS_VERSION) - .setDefaultValue("3.3.1") + .setDefaultValue("3.3.4") .setIsHidden(true) .build(); + // new job service + public static final PropertyKey JOB_BATCH_SIZE = + intBuilder(Name.JOB_BATCH_SIZE) + .setDescription("The number of tasks would be included in a job request.") + .setDefaultValue(20) + .setScope(Scope.MASTER) + .build(); + public static final PropertyKey JOB_RETENTION_TIME = + durationBuilder(Name.JOB_RETENTION_TIME) + .setDescription("The length of time the Alluxio should save information about " + + "completed jobs before they are discarded.") + .setDefaultValue("1d") + .setScope(Scope.MASTER) + .build(); + // // Job service // @@ -6830,6 +7356,20 @@ public String toString() { .setDefaultValue(100000) .setScope(Scope.MASTER) .build(); + public static final PropertyKey JOB_MASTER_MASTER_HEARTBEAT_INTERVAL = + durationBuilder(Name.JOB_MASTER_MASTER_HEARTBEAT_INTERVAL) + .setDescription("The amount of time that a standby Alluxio Job Master should wait " + + "in between heartbeats to the primary Job Master.") + .setDefaultValue("1sec") + .setScope(Scope.MASTER) + .build(); + public static final PropertyKey JOB_MASTER_MASTER_TIMEOUT = + durationBuilder(Name.JOB_MASTER_MASTER_TIMEOUT) + .setDescription("The time period after which the primary Job Master will mark a standby " + + "as lost without a subsequent heartbeat.") + .setDefaultValue("60sec") + .setScope(Scope.MASTER) + .build(); public static final PropertyKey JOB_MASTER_WORKER_HEARTBEAT_INTERVAL = durationBuilder(Name.JOB_MASTER_WORKER_HEARTBEAT_INTERVAL) .setDescription("The amount of time that the Alluxio job worker should wait in between " @@ -6856,6 +7396,13 @@ public String toString() { .setDefaultValue(format("${%s}", Name.MASTER_HOSTNAME)) .setScope(Scope.ALL) .build(); + public static final PropertyKey JOB_MASTER_LOST_MASTER_INTERVAL = + durationBuilder(Name.JOB_MASTER_LOST_MASTER_INTERVAL) + .setDescription("The time interval the job master waits between checks for " + + "lost job masters.") + .setDefaultValue("10sec") + .setScope(Scope.MASTER) + .build(); public static final PropertyKey JOB_MASTER_LOST_WORKER_INTERVAL = durationBuilder(Name.JOB_MASTER_LOST_WORKER_INTERVAL) .setDescription("The time interval the job master waits between checks for lost workers.") @@ -6890,7 +7437,7 @@ public String toString() { intBuilder(Name.JOB_REQUEST_BATCH_SIZE) .setDescription("The batch size client uses to make requests to the " + "job master.") - .setDefaultValue(20) + .setDefaultValue(1) .setScope(Scope.CLIENT) .build(); public static final PropertyKey JOB_WORKER_BIND_HOST = @@ -7078,7 +7625,6 @@ public String toString() { .setDescription("(Experimental) Enables the table service.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_CATALOG_PATH = stringBuilder(Name.TABLE_CATALOG_PATH) @@ -7086,7 +7632,6 @@ public String toString() { .setDescription("The Alluxio file path for the table catalog metadata.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_CATALOG_UDB_SYNC_TIMEOUT = durationBuilder(Name.TABLE_CATALOG_UDB_SYNC_TIMEOUT) @@ -7095,7 +7640,6 @@ public String toString() { + "takes longer than this timeout, the sync will be terminated.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_JOURNAL_PARTITIONS_CHUNK_SIZE = intBuilder(Name.TABLE_JOURNAL_PARTITIONS_CHUNK_SIZE) @@ -7103,7 +7647,6 @@ public String toString() { .setDescription("The maximum table partitions number in a single journal entry.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_TRANSFORM_MANAGER_JOB_MONITOR_INTERVAL = durationBuilder(Name.TABLE_TRANSFORM_MANAGER_JOB_MONITOR_INTERVAL) @@ -7114,7 +7657,6 @@ public String toString() { + "locations after transformation.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_TRANSFORM_MANAGER_JOB_HISTORY_RETENTION_TIME = durationBuilder(Name.TABLE_TRANSFORM_MANAGER_JOB_HISTORY_RETENTION_TIME) @@ -7123,7 +7665,6 @@ public String toString() { + "about finished transformation jobs before they are discarded.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_UDB_HIVE_CLIENTPOOL_MIN = intBuilder(Name.TABLE_UDB_HIVE_CLIENTPOOL_MIN) @@ -7131,7 +7672,6 @@ public String toString() { .setDescription("The minimum capacity of the hive client pool per hive metastore") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_UDB_HIVE_CLIENTPOOL_MAX = intBuilder(Name.TABLE_UDB_HIVE_CLIENTPOOL_MAX) @@ -7139,7 +7679,6 @@ public String toString() { .setDescription("The maximum capacity of the hive client pool per hive metastore") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_LOAD_DEFAULT_REPLICATION = intBuilder(Name.TABLE_LOAD_DEFAULT_REPLICATION) @@ -7147,7 +7686,6 @@ public String toString() { .setDescription("The default replication number of files under the SDS table after " + "load option.") .setScope(Scope.CLIENT) - .setIsHidden(true) .build(); public static final PropertyKey HADOOP_SECURITY_AUTHENTICATION = stringBuilder(Name.HADOOP_SECURITY_AUTHENTICATION) @@ -7170,6 +7708,12 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .setScope(Scope.SERVER) .build(); + public static final PropertyKey HADOOP_CHECKSUM_COMBINE_MODE = + booleanBuilder(Name.HADOOP_CHECKSUM_COMBINE_MODE) + .setDescription("File Checksum combine mode.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.CLIENT) + .build(); /** * @deprecated This key is used for testing. It is always deprecated. */ @@ -7201,6 +7745,9 @@ public static final class Name { public static final String CONF_VALIDATION_ENABLED = "alluxio.conf.validation.enabled"; public static final String DEBUG = "alluxio.debug"; public static final String EXTENSIONS_DIR = "alluxio.extensions.dir"; + public static final String EXIT_COLLECT_INFO = "alluxio.exit.collect.info"; + public static final String GRPC_REFLECTION_ENABLED = + "alluxio.grpc.reflection.enabled"; public static final String HOME = "alluxio.home"; public static final String INTEGRATION_MASTER_RESOURCE_CPU = "alluxio.integration.master.resource.cpu"; @@ -7247,6 +7794,12 @@ public static final class Name { public static final String WEB_RESOURCES = "alluxio.web.resources"; public static final String WEB_THREADS = "alluxio.web.threads"; public static final String WEB_CORS_ENABLED = "alluxio.web.cors.enabled"; + public static final String WEB_CORS_ALLOW_CREDENTIAL = "alluxio.web.cors.allow.credential"; + public static final String WEB_CORS_ALLOW_HEADERS = "alluxio.web.cors.allow.headers"; + public static final String WEB_CORS_ALLOW_METHODS = "alluxio.web.cors.allow.methods"; + public static final String WEB_CORS_ALLOW_ORIGINS = "alluxio.web.cors.allow.origins"; + public static final String WEB_CORS_EXPOSED_HEADERS = "alluxio.web.cors.exposed.headers"; + public static final String WEB_CORS_MAX_AGE = "alluxio.web.cors.max.age"; public static final String WEB_REFRESH_INTERVAL = "alluxio.web.refresh.interval"; public static final String WEB_THREAD_DUMP_TO_LOG = "alluxio.web.threaddump.log.enabled"; public static final String WEB_UI_ENABLED = "alluxio.web.ui.enabled"; @@ -7315,6 +7868,8 @@ public static final class Name { public static final String UNDERFS_WEB_PARENT_NAMES = "alluxio.underfs.web.parent.names"; public static final String UNDERFS_WEB_TITLES = "alluxio.underfs.web.titles"; public static final String UNDERFS_VERSION = "alluxio.underfs.version"; + public static final String UNDERFS_OBJECT_STORE_STREAMING_UPLOAD_PART_TIMEOUT = + "alluxio.underfs.object.store.streaming.upload.part.timeout"; public static final String UNDERFS_OBJECT_STORE_BREADCRUMBS_ENABLED = "alluxio.underfs.object.store.breadcrumbs.enabled"; public static final String UNDERFS_OBJECT_STORE_SERVICE_THREADS = @@ -7330,6 +7885,25 @@ public static final class Name { "alluxio.underfs.oss.connection.timeout"; public static final String UNDERFS_OSS_CONNECT_TTL = "alluxio.underfs.oss.connection.ttl"; public static final String UNDERFS_OSS_SOCKET_TIMEOUT = "alluxio.underfs.oss.socket.timeout"; + public static final String UNDERFS_OSS_ECS_RAM_ROLE = "alluxio.underfs.oss.ecs.ram.role"; + public static final String UNDERFS_OSS_RETRY_MAX = "alluxio.underfs.oss.retry.max"; + public static final String UNDERFS_OSS_STS_ECS_METADATA_SERVICE_ENDPOINT = + "alluxio.underfs.oss.sts.ecs.metadata.service.endpoint"; + public static final String UNDERFS_OSS_STS_ENABLED = "alluxio.underfs.oss.sts.enabled"; + public static final String UNDERFS_OSS_STS_TOKEN_REFRESH_INTERVAL_MS = + "alluxio.underfs.oss.sts.token.refresh.interval.ms"; + public static final String UNDERFS_OSS_INTERMEDIATE_UPLOAD_CLEAN_AGE = + "alluxio.underfs.oss.intermediate.upload.clean.age"; + public static final String UNDERFS_OSS_STREAMING_UPLOAD_ENABLED = + "alluxio.underfs.oss.streaming.upload.enabled"; + public static final String UNDERFS_OSS_STREAMING_UPLOAD_PARTITION_SIZE = + "alluxio.underfs.oss.streaming.upload.partition.size"; + public static final String UNDERFS_OSS_STREAMING_UPLOAD_THREADS = + "alluxio.underfs.oss.streaming.upload.threads"; + public static final String UNDERFS_OSS_DEFAULT_MODE = + "alluxio.underfs.oss.default.mode"; + public static final String UNDERFS_OSS_OWNER_ID_TO_USERNAME_MAPPING = + "alluxio.underfs.oss.owner.id.to.username.mapping"; public static final String UNDERFS_S3_BULK_DELETE_ENABLED = "alluxio.underfs.s3.bulk.delete.enabled"; public static final String UNDERFS_S3_DEFAULT_MODE = "alluxio.underfs.s3.default.mode"; @@ -7401,6 +7975,14 @@ public static final class Name { "alluxio.underfs.cephfs.mount.point"; public static final String UNDERFS_CEPHFS_LOCALIZE_READS = "alluxio.underfs.cephfs.localize.reads"; + public static final String UNDERFS_OBS_INTERMEDIATE_UPLOAD_CLEAN_AGE = + "alluxio.underfs.obs.intermediate.upload.clean.age"; + public static final String UNDERFS_OBS_STREAMING_UPLOAD_ENABLED = + "alluxio.underfs.obs.streaming.upload.enabled"; + public static final String UNDERFS_OBS_STREAMING_UPLOAD_PARTITION_SIZE = + "alluxio.underfs.obs.streaming.upload.partition.size"; + public static final String UNDERFS_OBS_STREAMING_UPLOAD_THREADS = + "alluxio.underfs.obs.streaming.upload.threads"; // // UFS access control related properties @@ -7505,6 +8087,10 @@ public static final class Name { "alluxio.master.cluster.metrics.update.interval"; public static final String MASTER_CONTAINER_ID_RESERVATION_SIZE = "alluxio.master.container.id.reservation.size"; + public static final String MASTER_FAILOVER_COLLECT_INFO = + "alluxio.master.failover.collect.info"; + public static final String MASTER_FILE_ACCESS_TIME_UPDATER_ENABLED = + "alluxio.master.file.access.time.updater.enabled"; public static final String MASTER_FILE_ACCESS_TIME_JOURNAL_FLUSH_INTERVAL = "alluxio.master.file.access.time.journal.flush.interval"; public static final String MASTER_FILE_ACCESS_TIME_UPDATE_PRECISION = @@ -7529,6 +8115,8 @@ public static final class Name { "alluxio.master.lock.pool.high.watermark"; public static final String MASTER_LOCK_POOL_CONCURRENCY_LEVEL = "alluxio.master.lock.pool.concurrency.level"; + public static final String MASTER_LOST_PROXY_DELETION_TIMEOUT_MS = + "alluxio.master.lost.proxy.deletion.timeout"; public static final String MASTER_JOURNAL_CATCHUP_PROTECT_ENABLED = "alluxio.master.journal.catchup.protect.enabled"; public static final String MASTER_JOURNAL_EXIT_ON_DEMOTION = @@ -7567,8 +8155,6 @@ public static final class Name { "alluxio.master.journal.tailer.sleep.time"; private static final String MASTER_JOURNAL_UFS_OPTION = "alluxio.master.journal.ufs.option"; public static final String MASTER_RPC_ADDRESSES = "alluxio.master.rpc.addresses"; - public static final String MASTER_EMBEDDED_JOURNAL_PROXY_HOST = - "alluxio.master.embedded.journal.bind.host"; public static final String MASTER_EMBEDDED_JOURNAL_ADDRESSES = "alluxio.master.embedded.journal.addresses"; public static final String MASTER_EMBEDDED_JOURNAL_MAX_ELECTION_TIMEOUT = @@ -7595,6 +8181,10 @@ public static final class Name { "alluxio.master.embedded.journal.write.timeout"; public static final String MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_CHUNK_SIZE = "alluxio.master.embedded.journal.snapshot.replication.chunk.size"; + public static final String MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_TYPE = + "alluxio.master.embedded.journal.snapshot.replication.compression.type"; + public static final String MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL = + "alluxio.master.embedded.journal.snapshot.replication.compression.level"; public static final String MASTER_EMBEDDED_JOURNAL_RAFT_CLIENT_REQUEST_TIMEOUT = "alluxio.master.embedded.journal.raft.client.request.timeout"; public static final String MASTER_EMBEDDED_JOURNAL_RAFT_CLIENT_REQUEST_INTERVAL = @@ -7624,6 +8214,14 @@ public static final class Name { "alluxio.master.metadata.sync.instrument.executor"; public static final String MASTER_METADATA_SYNC_REPORT_FAILURE = "alluxio.master.metadata.sync.report.failure"; + public static final String MASTER_METADATA_SYNC_GET_DIRECTORY_STATUS_SKIP_LOADING_CHILDREN = + "alluxio.master.metadata.sync.get.directory.status.skip.loading.children"; + public static final String MASTER_METADATA_SYNC_UFS_CONCURRENT_LOADS = + "alluxio.master.metadata.sync.ufs.concurrent.loads"; + public static final String MASTER_METADATA_SYNC_UFS_CONCURRENT_GET_STATUS = + "alluxio.master.metadata.sync.ufs.concurrent.get.status"; + public static final String MASTER_METADATA_SYNC_UFS_CONCURRENT_LISTING = + "alluxio.master.metadata.sync.ufs.concurrent.listing"; public static final String MASTER_METADATA_SYNC_UFS_PREFETCH_POOL_SIZE = "alluxio.master.metadata.sync.ufs.prefetch.pool.size"; public static final String MASTER_METADATA_SYNC_TRAVERSAL_ORDER = @@ -7632,6 +8230,8 @@ public static final class Name { "alluxio.master.metadata.sync.ufs.prefetch.status"; public static final String MASTER_METADATA_SYNC_UFS_PREFETCH_TIMEOUT = "alluxio.master.metadata.sync.ufs.prefetch.timeout"; + public static final String MASTER_METADATA_SYNC_UFS_RATE_LIMIT = + "alluxio.master.metadata.sync.ufs.rate.limit"; public static final String MASTER_METADATA_SYNC_IGNORE_TTL = "alluxio.master.metadata.sync.ignore.ttl"; public static final String MASTER_METASTORE = "alluxio.master.metastore"; @@ -7642,10 +8242,14 @@ public static final class Name { "alluxio.master.metastore.dir.inode"; public static final String MASTER_METASTORE_DIR_BLOCK = "alluxio.master.metastore.dir.block"; + public static final String MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_LEVEL = + "alluxio.master.metastore.rocks.checkpoint.compression.level"; + public static final String MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_TYPE = + "alluxio.master.metastore.rocks.checkpoint.compression.type"; + public static final String MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT = + "alluxio.master.metastore.rocks.exclusive.lock.timeout"; public static final String MASTER_METASTORE_ROCKS_PARALLEL_BACKUP = "alluxio.master.metastore.rocks.parallel.backup"; - public static final String MASTER_METASTORE_ROCKS_PARALLEL_BACKUP_COMPRESSION_LEVEL = - "alluxio.master.metastore.rocks.parallel.backup.compression.level"; public static final String MASTER_METASTORE_ROCKS_PARALLEL_BACKUP_THREADS = "alluxio.master.metastore.rocks.parallel.backup.threads"; public static final String MASTER_METASTORE_INODE_CACHE_EVICT_BATCH_SIZE = @@ -7739,6 +8343,9 @@ public static final class Name { public static final String MASTER_PERIODIC_BLOCK_INTEGRITY_CHECK_INTERVAL = "alluxio.master.periodic.block.integrity.check.interval"; public static final String MASTER_PRINCIPAL = "alluxio.master.principal"; + public static final String MASTER_PROXY_TIMEOUT_MS = "alluxio.master.proxy.timeout"; + public static final String MASTER_PROXY_CHECK_HEARTBEAT_INTERVAL = + "alluxio.master.proxy.check.heartbeat.timeout"; public static final String MASTER_REPLICATION_CHECK_INTERVAL_MS = "alluxio.master.replication.check.interval"; public static final String MASTER_RPC_PORT = "alluxio.master.rpc.port"; @@ -7765,6 +8372,8 @@ public static final class Name { "alluxio.master.skip.root.acl.check"; public static final String MASTER_STARTUP_BLOCK_INTEGRITY_CHECK_ENABLED = "alluxio.master.startup.block.integrity.check.enabled"; + public static final String MASTER_STATE_LOCK_ERROR_THRESHOLD = + "alluxio.master.state.lock.error.threshold"; public static final String MASTER_TIERED_STORE_GLOBAL_LEVEL0_ALIAS = "alluxio.master.tieredstore.global.level0.alias"; public static final String MASTER_TIERED_STORE_GLOBAL_LEVEL1_ALIAS = @@ -7908,6 +8517,8 @@ public static final class Name { "alluxio.standby.master.metrics.sink.enabled"; public static final String STANDBY_MASTER_WEB_ENABLED = "alluxio.standby.master.web.enabled"; + public static final String STANDBY_MASTER_GRPC_ENABLED = + "alluxio.standby.master.grpc.enabled"; // // Worker related properties @@ -8027,10 +8638,14 @@ public static final class Name { "alluxio.worker.register.stream.response.timeout"; public static final String WORKER_REGISTER_STREAM_COMPLETE_TIMEOUT = "alluxio.worker.register.stream.complete.timeout"; + public static final String WORKER_REGISTER_TO_ALL_MASTERS = + "alluxio.worker.register.to.all.masters"; public static final String WORKER_REMOTE_IO_SLOW_THRESHOLD = "alluxio.worker.remote.io.slow.threshold"; public static final String WORKER_BLOCK_MASTER_CLIENT_POOL_SIZE = "alluxio.worker.block.master.client.pool.size"; + public static final String WORKER_BLOCK_HEARTBEAT_REPORT_SIZE_THRESHOLD = + "alluxio.worker.block.heartbeat.report.size.threshold"; public static final String WORKER_PRINCIPAL = "alluxio.worker.principal"; public static final String WORKER_PAGE_STORE_ASYNC_RESTORE_ENABLED = "alluxio.worker.page.store.async.restore.enabled"; @@ -8110,8 +8725,6 @@ public static final class Name { public static final String WORKER_WEB_BIND_HOST = "alluxio.worker.web.bind.host"; public static final String WORKER_WEB_HOSTNAME = "alluxio.worker.web.hostname"; public static final String WORKER_WEB_PORT = "alluxio.worker.web.port"; - public static final String WORKER_UFS_BLOCK_OPEN_TIMEOUT_MS = - "alluxio.worker.ufs.block.open.timeout"; public static final String WORKER_UFS_INSTREAM_CACHE_EXPIRATION_TIME = "alluxio.worker.ufs.instream.cache.expiration.time"; public static final String WORKER_UFS_INSTREAM_CACHE_ENABLED = @@ -8123,6 +8736,8 @@ public static final class Name { // // Proxy related properties // + public static final String PROXY_MASTER_HEARTBEAT_INTERVAL = + "alluxio.proxy.master.heartbeat.interval"; public static final String PROXY_S3_WRITE_TYPE = "alluxio.proxy.s3.writetype"; public static final String PROXY_S3_DELETE_TYPE = "alluxio.proxy.s3.deletetype"; public static final String PROXY_S3_MULTIPART_UPLOAD_CLEANER_ENABLED = @@ -8156,7 +8771,29 @@ public static final class Name { public static final String PROXY_WEB_PORT = "alluxio.proxy.web.port"; public static final String PROXY_AUDIT_LOGGING_ENABLED = "alluxio.proxy.audit.logging.enabled"; + public static final String PROXY_S3_V2_VERSION_ENABLED = + "alluxio.proxy.s3.v2.version.enabled"; + public static final String PROXY_S3_V2_ASYNC_PROCESSING_ENABLED = + "alluxio.proxy.s3.v2.async.processing.enabled"; + public static final String PROXY_S3_V2_ASYNC_LIGHT_POOL_CORE_THREAD_NUMBER = + "alluxio.proxy.s3.v2.async.light.pool.core.thread.number"; + public static final String PROXY_S3_V2_ASYNC_LIGHT_POOL_MAXIMUM_THREAD_NUMBER = + "alluxio.proxy.s3.v2.async.light.pool.maximum.thread.number"; + public static final String PROXY_S3_V2_ASYNC_LIGHT_POOL_QUEUE_SIZE = + "alluxio.proxy.s3.v2.async.light.pool.queue.size"; + public static final String PROXY_S3_V2_ASYNC_HEAVY_POOL_CORE_THREAD_NUMBER = + "alluxio.proxy.s3.v2.async.heavy.pool.core.thread.number"; + public static final String PROXY_S3_V2_ASYNC_HEAVY_POOL_MAXIMUM_THREAD_NUMBER = + "alluxio.proxy.s3.v2.async.heavy.pool.maximum.thread.number"; + public static final String PROXY_S3_V2_ASYNC_HEAVY_POOL_QUEUE_SIZE = + "alluxio.proxy.s3.v2.async.heavy.pool.queue.size"; public static final String S3_UPLOADS_ID_XATTR_KEY = "s3_uploads_mulitpartupload_id"; + public static final String PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS = + "alluxio.proxy.s3.bucketpathcache.timeout"; + public static final String PROXY_S3_GLOBAL_READ_RATE_LIMIT_MB = + "alluxio.proxy.s3.global.read.rate.limit.mb"; + public static final String PROXY_S3_SINGLE_CONNECTION_READ_RATE_LIMIT_MB = + "alluxio.proxy.s3.single.connection.read.rate.limit.mb"; // // Locality related properties @@ -8259,6 +8896,12 @@ public static final class Name { "alluxio.user.client.cache.page.size"; public static final String USER_CLIENT_CACHE_QUOTA_ENABLED = "alluxio.user.client.cache.quota.enabled"; + public static final String USER_CLIENT_CACHE_TTL_ENABLED = + "alluxio.user.client.cache.ttl.enabled"; + public static final String USER_CLIENT_CACHE_TTL_CHECK_INTERVAL_SECONDS = + "alluxio.user.client.cache.ttl.check.interval.seconds"; + public static final String USER_CLIENT_CACHE_TTL_THRESHOLD_SECONDS = + "alluxio.user.client.cache.ttl.threshold.seconds"; public static final String USER_CLIENT_CACHE_SIZE = "alluxio.user.client.cache.size"; public static final String USER_CLIENT_CACHE_STORE_OVERHEAD = @@ -8269,6 +8912,10 @@ public static final class Name { "alluxio.user.client.cache.timeout.duration"; public static final String USER_CLIENT_CACHE_TIMEOUT_THREADS = "alluxio.user.client.cache.timeout.threads"; + public static final String USER_CLIENT_CACHE_IDENTIFIER_INCLUDE_MTIME = + "alluxio.user.client.cache.include.mtime"; + public static final String USER_CLIENT_REPORT_VERSION_ENABLED = + "alluxio.user.client.report.version.enabled"; public static final String USER_CONF_CLUSTER_DEFAULT_ENABLED = "alluxio.user.conf.cluster.default.enabled"; public static final String USER_CONF_SYNC_INTERVAL = "alluxio.user.conf.sync.interval"; @@ -8279,6 +8926,8 @@ public static final class Name { "alluxio.user.file.copyfromlocal.block.location.policy.class"; public static final String USER_FILE_DELETE_UNCHECKED = "alluxio.user.file.delete.unchecked"; + public static final String USER_FILE_DIRECT_ACCESS = + "alluxio.user.file.direct.access"; public static final String USER_FILE_MASTER_CLIENT_POOL_SIZE_MIN = "alluxio.user.file.master.client.pool.size.min"; public static final String USER_FILE_MASTER_CLIENT_POOL_SIZE_MAX = @@ -8323,12 +8972,16 @@ public static final class Name { public static final String USER_FILE_WRITE_INIT_MAX_DURATION = "alluxio.user.file.write.init.max.duration"; public static final String USER_HOSTNAME = "alluxio.user.hostname"; + public static final String USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS = + "alluxio.user.hdfs.client.exclude.mount.info.on.list.status"; public static final String USER_LOCAL_READER_CHUNK_SIZE_BYTES = "alluxio.user.local.reader.chunk.size.bytes"; public static final String USER_LOCAL_WRITER_CHUNK_SIZE_BYTES = "alluxio.user.local.writer.chunk.size.bytes"; public static final String USER_LOGGING_THRESHOLD = "alluxio.user.logging.threshold"; public static final String USER_MASTER_POLLING_TIMEOUT = "alluxio.user.master.polling.timeout"; + public static final String USER_MASTER_POLLING_CONCURRENT = + "alluxio.user.master.polling.concurrent"; public static final String USER_METADATA_CACHE_ENABLED = "alluxio.user.metadata.cache.enabled"; public static final String USER_METADATA_CACHE_MAX_SIZE = @@ -8522,6 +9175,10 @@ public static final class Name { "alluxio.network.tls.ssl.context.provider.classname"; public static final String NETWORK_TLS_ENABLED = "alluxio.network.tls.enabled"; + // new job service + public static final String JOB_BATCH_SIZE = "alluxio.job.batch.size"; + public static final String JOB_RETENTION_TIME = "alluxio.job.retention.time"; + // // Job service // @@ -8532,6 +9189,10 @@ public static final class Name { public static final String JOB_MASTER_FINISHED_JOB_RETENTION_TIME = "alluxio.job.master.finished.job.retention.time"; public static final String JOB_MASTER_JOB_CAPACITY = "alluxio.job.master.job.capacity"; + public static final String JOB_MASTER_MASTER_HEARTBEAT_INTERVAL = + "alluxio.job.master.master.heartbeat.interval"; + public static final String JOB_MASTER_MASTER_TIMEOUT = + "alluxio.job.master.master.timeout"; public static final String JOB_MASTER_WORKER_HEARTBEAT_INTERVAL = "alluxio.job.master.worker.heartbeat.interval"; public static final String JOB_MASTER_WORKER_TIMEOUT = @@ -8539,6 +9200,8 @@ public static final class Name { public static final String JOB_MASTER_BIND_HOST = "alluxio.job.master.bind.host"; public static final String JOB_MASTER_HOSTNAME = "alluxio.job.master.hostname"; + public static final String JOB_MASTER_LOST_MASTER_INTERVAL = + "alluxio.job.master.lost.master.interval"; public static final String JOB_MASTER_LOST_WORKER_INTERVAL = "alluxio.job.master.lost.worker.interval"; public static final String JOB_MASTER_RPC_PORT = "alluxio.job.master.rpc.port"; @@ -8617,6 +9280,8 @@ public static final class Name { public static final String HADOOP_KERBEROS_KEYTAB_LOGIN_AUTORENEWAL = "alluxio.hadoop.kerberos.keytab.login.autorenewal"; + public static final String HADOOP_CHECKSUM_COMBINE_MODE = + "alluxio.hadoop.checksum.combine.mode"; private Name() {} // prevent instantiation } @@ -8688,7 +9353,7 @@ public enum Template { PropertyType.STRING), UNDERFS_ABFS_ACCOUNT_KEY( "fs.azure.account.key.%s.dfs.core.windows.net", - "fs\\.azure\\.account\\.key\\.(\\w+)\\.dfs\\.core\\.window\\.net", + "fs\\.azure\\.account\\.key\\.(\\w+)\\.dfs\\.core\\.windows\\.net", PropertyCreators.fromBuilder(stringBuilder("fs.azure.account.key.%s.dfs.core.windows.net") .setDisplayType(DisplayType.CREDENTIALS))), UNDERFS_AZURE_ACCOUNT_KEY( diff --git a/core/common/src/main/java/alluxio/conf/Reconfigurable.java b/core/common/src/main/java/alluxio/conf/Reconfigurable.java index 19b7dae27309..cbb4f76824d8 100644 --- a/core/common/src/main/java/alluxio/conf/Reconfigurable.java +++ b/core/common/src/main/java/alluxio/conf/Reconfigurable.java @@ -11,6 +11,8 @@ package alluxio.conf; +import java.util.Map; + /** * Reconfigurable listener. */ @@ -18,6 +20,12 @@ public interface Reconfigurable { /** * When the property changed, this function will be invoked. + * @param changedProperties the changed properties + */ + void update(Map changedProperties); + + /** + * When any property changed, this function will be invoked. */ void update(); } diff --git a/core/common/src/main/java/alluxio/conf/ReconfigurableRegistry.java b/core/common/src/main/java/alluxio/conf/ReconfigurableRegistry.java index 92cf6e937420..e4e9492f82c2 100644 --- a/core/common/src/main/java/alluxio/conf/ReconfigurableRegistry.java +++ b/core/common/src/main/java/alluxio/conf/ReconfigurableRegistry.java @@ -11,13 +11,20 @@ package alluxio.conf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.util.LinkedList; import java.util.List; +import java.util.Map; +import javax.annotation.concurrent.ThreadSafe; /** * Registry of all reconfigurable listeners. */ +@ThreadSafe public class ReconfigurableRegistry { + private static final Logger LOG = LoggerFactory.getLogger(ReconfigurableRegistry.class); private static final List LISTENER_LIST = new LinkedList<>(); /** @@ -46,11 +53,32 @@ public static synchronized boolean unregister(Reconfigurable listener) { */ public static synchronized boolean update() { for (Reconfigurable listener : LISTENER_LIST) { - listener.update(); + try { + listener.update(); + } catch (Throwable t) { + LOG.error("Error while update changed properties for {}", listener, t); + } } return true; } // prevent instantiation private ReconfigurableRegistry() {} + + /** + * When the property was reconfigured, this function will be invoked. + * This property listeners will be notified. + * + * @param changedProperties the changed properties + */ + public static synchronized void update(Map changedProperties) { + for (Reconfigurable listener : LISTENER_LIST) { + try { + listener.update(changedProperties); + } catch (Throwable t) { + LOG.error("Error while update changed properties {} for {}", + changedProperties, listener, t); + } + } + } } diff --git a/core/common/src/main/java/alluxio/conf/path/TrieNode.java b/core/common/src/main/java/alluxio/conf/path/TrieNode.java index e741478a9859..c2cc0f67179f 100644 --- a/core/common/src/main/java/alluxio/conf/path/TrieNode.java +++ b/core/common/src/main/java/alluxio/conf/path/TrieNode.java @@ -71,6 +71,27 @@ public TrieNode insert(String path) { return current; } + /** + * Get the terminal node closest to the full path. + * @param path the path to check + * @return the terminal node + */ + public Optional> getClosestTerminal(String path) { + TrieNode current = this; + TrieNode result = current.isTerminal() ? current : null; + for (String nxt : path.split("/")) { + if (current.mChildren.containsKey(nxt)) { + current = current.mChildren.get(nxt); + if (current.mIsTerminal) { + result = current; + } + } else { + break; + } + } + return Optional.ofNullable(result); + } + /** * Traverses the trie along the path components until the traversal cannot proceed any more. * diff --git a/core/common/src/main/java/alluxio/exception/ExceptionMessage.java b/core/common/src/main/java/alluxio/exception/ExceptionMessage.java index 15ad6383f2e0..09bc977dee5f 100644 --- a/core/common/src/main/java/alluxio/exception/ExceptionMessage.java +++ b/core/common/src/main/java/alluxio/exception/ExceptionMessage.java @@ -27,6 +27,7 @@ public enum ExceptionMessage { // general PATH_DOES_NOT_EXIST("Path \"{0}\" does not exist."), + BUCKET_DOES_NOT_EXIST("Bucket \"{0}\" does not exist."), PATH_DOES_NOT_EXIST_PARTIAL_LISTING("Path \"{0}\" was removed during listing."), INODE_NOT_FOUND_PARTIAL_LISTING("\"{0}\" Inode was not found during partial listing. It was " + "likely removed across listing calls."), @@ -95,6 +96,9 @@ public enum ExceptionMessage { ROOT_CANNOT_BE_RENAMED("The root directory cannot be renamed"), JOURNAL_ENTRY_MISSING( "Journal entries are missing between sequence number {0} (inclusive) and {1} (exclusive)."), + CANNOT_OVERWRITE_DIRECTORY("{0} already exists. Directories cannot be overwritten with create"), + CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE("{0} already exists. If you want to overwrite the file," + + " you need to specify the overwrite option."), // block master NO_WORKER_FOUND("No worker with workerId {0,number,#} is found"), @@ -114,6 +118,11 @@ public enum ExceptionMessage { // file system master ufs FAILED_UFS_RENAME("Failed to rename {0} to {1} in the under file system"), + // worker + WORKER_NOT_FOUND("Worker {0} not found"), + WORKER_DECOMMISSIONED_BEFORE_REGISTER("Attempting to decommission an unregistered worker {0}. " + + "Please wait until this worker is registered."), + // cli INVALID_ARGS_NULL("Null args for command {0}"), INVALID_ARGS_NUM("Command {0} takes {1} arguments, not {2}"), @@ -188,6 +197,16 @@ public enum ExceptionMessage { // ufs maintenance UFS_OP_NOT_ALLOWED("Operation {0} not allowed on ufs path {1} under maintenance mode {2}"), + // RocksDB + ROCKS_DB_CLOSING("RocksDB is being closed because the master is under one of the following " + + "events: primary failover/shut down/checkpoint/journal replay"), + ROCKS_DB_REWRITTEN("RocksDB has been rewritten. Typically this is because the master is " + + "restored to a checkpoint."), + ROCKS_DB_EXCLUSIVE_LOCK_FORCED("RocksDB exclusive lock is forced with {0} ongoing " + + "r/w operations. There is a risk to crash!"), + ROCKS_DB_REF_COUNT_DIRTY("Some read/write operations did not respect the exclusive lock on " + + "the RocksStore and messed up the ref count! Current ref count is {0}."), + // SEMICOLON! minimize merge conflicts by putting it on its own line ; diff --git a/core/common/src/main/java/alluxio/exception/runtime/BlockDoesNotExistRuntimeException.java b/core/common/src/main/java/alluxio/exception/runtime/BlockDoesNotExistRuntimeException.java index 5eef0d297afa..ce7aef647412 100644 --- a/core/common/src/main/java/alluxio/exception/runtime/BlockDoesNotExistRuntimeException.java +++ b/core/common/src/main/java/alluxio/exception/runtime/BlockDoesNotExistRuntimeException.java @@ -19,11 +19,21 @@ public class BlockDoesNotExistRuntimeException extends NotFoundRuntimeException { /** - * Constructs a new exception with the specified detail message and cause. + * Constructs a new exception with the specified block ID. * * @param blockId block id */ public BlockDoesNotExistRuntimeException(long blockId) { super(MessageFormat.format("BlockMeta not found for blockId {0,number,#}", blockId)); } + + /** + * Constructs a new exception with the specified block ID and cause. + * + * @param blockId block id + * @param cause why the block is not found + */ + public BlockDoesNotExistRuntimeException(long blockId, Throwable cause) { + super(MessageFormat.format("Block {0,number,#} not found", blockId), cause); + } } diff --git a/core/common/src/main/java/alluxio/file/options/DirectoryLoadType.java b/core/common/src/main/java/alluxio/file/options/DirectoryLoadType.java new file mode 100644 index 000000000000..69e3e77eba22 --- /dev/null +++ b/core/common/src/main/java/alluxio/file/options/DirectoryLoadType.java @@ -0,0 +1,47 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.file.options; + +/** + * Defines how directories will be listed on the UFS when performing + * metadata synchronization. Only effects recursive listings. + */ +public enum DirectoryLoadType { + /** + * Load the path recursively by running a single command which returns results + * in batches if supported by the UFS. For example on an object store, this + * will perform a ListBucket operation with no delimiter. This will create + * less load on the UFS than {@link DirectoryLoadType#BFS} and {@link DirectoryLoadType#DFS} + * but will be more impacted by latency between Alluxio and the UFS as there + * is only a single listing running. + * This should only be used with S3 UFS types as currently only this UFS + * type uses batch listing, otherwise all items will be loaded into memory + * before processing. + */ + SINGLE_LISTING, + /** + * Load the path recursively by loading each nested directory in a separate + * load command in a breadth first manner. Each directory will be listed in batches + * if supported by the UFS. Listings of different directories will run concurrently. + * Note that this is only an approximate BFS, as batches are processed and loaded + * concurrently and may be loaded in different orders. + */ + BFS, + /** + * Load the path recursively by loading each nested directory in a separate + * load command in a depth first manner. Each directory will be listed in batches + * if supported by the UFS. Listings of different directories will run concurrently. + * Note that this is only an approximate DFS, as batches are processed and loaded + * concurrently and may be loaded in different orders. + */ + DFS +} diff --git a/core/common/src/main/java/alluxio/grpc/ClientVersionClientInjector.java b/core/common/src/main/java/alluxio/grpc/ClientVersionClientInjector.java new file mode 100644 index 000000000000..cb059bc84551 --- /dev/null +++ b/core/common/src/main/java/alluxio/grpc/ClientVersionClientInjector.java @@ -0,0 +1,51 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.grpc; + +import alluxio.RuntimeConstants; + +import io.grpc.CallOptions; +import io.grpc.Channel; +import io.grpc.ClientCall; +import io.grpc.ClientInterceptor; +import io.grpc.ForwardingClientCall; +import io.grpc.ForwardingClientCallListener; +import io.grpc.Metadata; +import io.grpc.MethodDescriptor; + +/** + * Client side interceptor that is used to set the request header for the client version. + */ +public class ClientVersionClientInjector implements ClientInterceptor { + public static final Metadata.Key S_CLIENT_VERSION_KEY = + Metadata.Key.of("alluxio-version", Metadata.ASCII_STRING_MARSHALLER); + + @Override + public ClientCall interceptCall(MethodDescriptor method, + CallOptions callOptions, Channel next) { + return new ForwardingClientCall.SimpleForwardingClientCall( + next.newCall(method, callOptions)) { + @Override + public void start(Listener responseListener, Metadata headers) { + // Put version to headers. + headers.put(S_CLIENT_VERSION_KEY, RuntimeConstants.VERSION_AND_REVISION_SHORT); + super.start(new ForwardingClientCallListener.SimpleForwardingClientCallListener( + responseListener) { + @Override + public void onHeaders(Metadata headers) { + super.onHeaders(headers); + } + }, headers); + } + }; + } +} diff --git a/core/common/src/main/java/alluxio/grpc/DataMessageMarshaller.java b/core/common/src/main/java/alluxio/grpc/DataMessageMarshaller.java index 828226cee616..446faa0a1d42 100644 --- a/core/common/src/main/java/alluxio/grpc/DataMessageMarshaller.java +++ b/core/common/src/main/java/alluxio/grpc/DataMessageMarshaller.java @@ -27,6 +27,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.Map; +import javax.annotation.Nullable; /** * Marshaller for data messages. @@ -104,7 +105,7 @@ public DataBuffer pollBuffer(T message) { * @param message the message to be combined * @return the message with the combined buffer */ - public abstract T combineData(DataMessage message); + public abstract T combineData(@Nullable DataMessage message); /** * Serialize the message to buffers. diff --git a/core/common/src/main/java/alluxio/grpc/GrpcChannelBuilder.java b/core/common/src/main/java/alluxio/grpc/GrpcChannelBuilder.java index 709f38188ad2..c63b0fd93c94 100644 --- a/core/common/src/main/java/alluxio/grpc/GrpcChannelBuilder.java +++ b/core/common/src/main/java/alluxio/grpc/GrpcChannelBuilder.java @@ -104,6 +104,9 @@ public GrpcChannel build() throws AlluxioStatusException { } throw AlluxioStatusException.fromThrowable(t); } + if (mConfiguration.getBoolean(PropertyKey.USER_CLIENT_REPORT_VERSION_ENABLED)) { + channel.intercept(new ClientVersionClientInjector()); + } return channel; } } diff --git a/core/common/src/main/java/alluxio/grpc/GrpcSerializationUtils.java b/core/common/src/main/java/alluxio/grpc/GrpcSerializationUtils.java index afa335154d05..ceb7c08084a9 100644 --- a/core/common/src/main/java/alluxio/grpc/GrpcSerializationUtils.java +++ b/core/common/src/main/java/alluxio/grpc/GrpcSerializationUtils.java @@ -47,7 +47,7 @@ public class GrpcSerializationUtils { private static final String BUFFER_INPUT_STREAM_CLASS_NAME = "io.grpc.internal.ReadableBuffers$BufferInputStream"; private static final String BUFFER_FIELD_NAME = "buffer"; - private static final String BUFFERS_FIELD_NAME = "buffers"; + private static final String READABLE_BUFFERS_FIELD_NAME = "readableBuffers"; private static final String NETTY_WRITABLE_BUFFER_CLASS_NAME = "io.grpc.netty.NettyWritableBuffer"; private static final String NETTY_READABLE_BUFFER_CLASS_NAME = @@ -79,7 +79,7 @@ public class GrpcSerializationUtils { sBufferList = getPrivateField(BUFFER_CHAIN_OUTPUT_STREAM_CLASS_NAME, BUFFER_LIST_FIELD_NAME); sCurrent = getPrivateField(BUFFER_CHAIN_OUTPUT_STREAM_CLASS_NAME, CURRENT_FIELD_NAME); sCompositeBuffers = - getPrivateField(CompositeReadableBuffer.class.getName(), BUFFERS_FIELD_NAME); + getPrivateField(CompositeReadableBuffer.class.getName(), READABLE_BUFFERS_FIELD_NAME); sReadableByteBuf = getPrivateField(NETTY_READABLE_BUFFER_CLASS_NAME, BUFFER_FIELD_NAME); } catch (Exception e) { LOG.warn("Cannot get gRPC output stream buffer, zero copy receive will be disabled.", e); @@ -95,7 +95,7 @@ private static Field getPrivateField(String className, String fieldName) return field; } - private static Constructor getPrivateConstructor(String className, Class ...parameterTypes) + private static Constructor getPrivateConstructor(String className, Class ... parameterTypes) throws ClassNotFoundException, NoSuchMethodException { Class declaringClass = Class.forName(className); Constructor constructor = declaringClass.getDeclaredConstructor(parameterTypes); @@ -146,6 +146,10 @@ public static ByteBuf getByteBufFromReadableBuffer(ReadableBuffer buffer) { } try { if (buffer instanceof CompositeReadableBuffer) { + // TODO(elega) grpc introduced native protobuf zero copy since 1.39.0 + // https://github.com/grpc/grpc-java/pull/8102/files + // replace the following with + // return Unpooled.wrappedBuffer(buffer.getByteBuffer()); Queue buffers = (Queue) sCompositeBuffers.get(buffer); if (buffers.size() == 1) { return getByteBufFromReadableBuffer(buffers.peek()); diff --git a/core/common/src/main/java/alluxio/grpc/GrpcServerBuilder.java b/core/common/src/main/java/alluxio/grpc/GrpcServerBuilder.java index 9eca6fffc4ee..79cab8631c22 100644 --- a/core/common/src/main/java/alluxio/grpc/GrpcServerBuilder.java +++ b/core/common/src/main/java/alluxio/grpc/GrpcServerBuilder.java @@ -24,6 +24,7 @@ import io.grpc.ServerInterceptors; import io.grpc.ServerServiceDefinition; import io.grpc.netty.NettyServerBuilder; +import io.grpc.protobuf.services.ProtoReflectionService; import io.netty.channel.ChannelOption; import io.netty.channel.EventLoopGroup; import io.netty.channel.ServerChannel; @@ -33,6 +34,7 @@ import java.util.Set; import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; import javax.annotation.Nullable; /** @@ -51,10 +53,13 @@ public final class GrpcServerBuilder { /** Alluxio configuration. */ private final AlluxioConfiguration mConfiguration; + private final boolean mGrpcReflectionEnabled; + private GrpcServerBuilder(GrpcServerAddress serverAddress, AuthenticationServer authenticationServer, AlluxioConfiguration conf) { mNettyServerBuilder = NettyServerBuilder.forAddress(serverAddress.getSocketAddress()); mConfiguration = conf; + mGrpcReflectionEnabled = conf.getBoolean(PropertyKey.GRPC_REFLECTION_ENABLED); if (conf.getBoolean(alluxio.conf.PropertyKey.NETWORK_TLS_ENABLED)) { sslContext(SslContextProvider.Factory.create(mConfiguration).getServerSSLContext()); @@ -258,8 +263,24 @@ public GrpcServerBuilder sslContext(SslContext sslContext) { * @return the built {@link GrpcServer} */ public GrpcServer build() { - addService(new GrpcService(new ServiceVersionClientServiceHandler(mServices)) + return build(null); + } + + /** + * Build the server. + * It attaches required services and interceptors for authentication. + * + * @param nodeStateSupplier a supplier to provide the node state (PRIMARY/STANDBY) + * @return the built {@link GrpcServer} + */ + public GrpcServer build(@Nullable Supplier nodeStateSupplier) { + addService(new GrpcService(new ServiceVersionClientServiceHandler(mServices, nodeStateSupplier)) .disableAuthentication()); + if (mGrpcReflectionEnabled) { + // authentication needs to be disabled so that the grpc command line tools can call + // this reflection endpoint and get the current grpc services and their interfaces. + addService(new GrpcService(ProtoReflectionService.newInstance()).disableAuthentication()); + } return new GrpcServer(mNettyServerBuilder.build(), mAuthenticationServer, mCloser, mConfiguration.getMs(PropertyKey.NETWORK_CONNECTION_SERVER_SHUTDOWN_TIMEOUT)); } diff --git a/core/common/src/main/java/alluxio/grpc/GrpcUtils.java b/core/common/src/main/java/alluxio/grpc/GrpcUtils.java index 10a51f2a0091..8a8ffbdf0c3f 100644 --- a/core/common/src/main/java/alluxio/grpc/GrpcUtils.java +++ b/core/common/src/main/java/alluxio/grpc/GrpcUtils.java @@ -15,6 +15,7 @@ import alluxio.Constants; import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; import alluxio.proto.journal.File; import alluxio.security.authorization.AccessControlList; import alluxio.security.authorization.AclAction; @@ -227,6 +228,25 @@ public static DescendantType fromProto(alluxio.grpc.LoadDescendantPType pDescend } } + /** + * Converts a proto type to a wire type. + * + * @param pDirectoryLoadType the proto representation of a directory load type + * @return the wire representation of the directory load type + */ + public static DirectoryLoadType fromProto(alluxio.grpc.DirectoryLoadPType pDirectoryLoadType) { + switch (pDirectoryLoadType) { + case SINGLE_LISTING: + return DirectoryLoadType.SINGLE_LISTING; + case BFS: + return DirectoryLoadType.BFS; + case DFS: + return DirectoryLoadType.DFS; + default: + throw new IllegalStateException("Unknown DirectoryLoadType: " + pDirectoryLoadType); + } + } + /** * Converts a proto type to a wire type. * @@ -329,7 +349,8 @@ public static WorkerInfo fromProto(alluxio.grpc.WorkerInfo workerInfo) { .setUsedBytes(workerInfo.getUsedBytes()) .setUsedBytesOnTiers(workerInfo.getUsedBytesOnTiersMap()) .setVersion(workerInfo.getBuildVersion().getVersion()) - .setRevision(workerInfo.getBuildVersion().getRevision()); + .setRevision(workerInfo.getBuildVersion().getRevision()) + .setNumVCpu(workerInfo.getNumVCpu()); } /** @@ -609,7 +630,8 @@ public static alluxio.grpc.WorkerInfo toProto(WorkerInfo workerInfo) { .putAllCapacityBytesOnTiers(workerInfo.getCapacityBytesOnTiers()) .putAllUsedBytesOnTiers(workerInfo.getUsedBytesOnTiers()) .setBuildVersion(BuildVersion.newBuilder().setVersion(workerInfo.getVersion()) - .setRevision(workerInfo.getRevision())) + .setRevision(workerInfo.getRevision())) + .setNumVCpu(workerInfo.getNumVCpu()) .build(); } diff --git a/core/common/src/main/java/alluxio/grpc/ReadResponseMarshaller.java b/core/common/src/main/java/alluxio/grpc/ReadResponseMarshaller.java index d1ad40596236..cb32eebb812b 100644 --- a/core/common/src/main/java/alluxio/grpc/ReadResponseMarshaller.java +++ b/core/common/src/main/java/alluxio/grpc/ReadResponseMarshaller.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.io.InputStream; +import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; /** @@ -83,7 +84,7 @@ protected ReadResponse deserialize(ReadableBuffer buffer) throws IOException { } @Override - public ReadResponse combineData(DataMessage message) { + public ReadResponse combineData(@Nullable DataMessage message) { if (message == null) { return null; } diff --git a/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java b/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java index e02bb4d4cca1..18fe8b6c1600 100644 --- a/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java +++ b/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java @@ -13,6 +13,8 @@ import alluxio.Constants; import alluxio.annotation.SuppressFBWarnings; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; import com.google.common.collect.ImmutableSet; import io.grpc.Status; @@ -20,6 +22,8 @@ import java.util.Objects; import java.util.Set; +import java.util.function.Supplier; +import javax.annotation.Nullable; /** * This class is a gRPC handler that serves Alluxio service versions. @@ -28,19 +32,33 @@ public final class ServiceVersionClientServiceHandler extends ServiceVersionClientServiceGrpc.ServiceVersionClientServiceImplBase { /** Set of services that are going to be recognized by this versioning service. */ private final Set mServices; + @Nullable private final Supplier mNodeStateSupplier; + private final boolean mStandbyRpcEnabled = + Configuration.getBoolean(PropertyKey.STANDBY_MASTER_GRPC_ENABLED); /** * Creates service version handler that allows given services. * @param services services to allow + * @param nodeStateSupplier the supplier to get the node state */ - public ServiceVersionClientServiceHandler(Set services) { + public ServiceVersionClientServiceHandler( + Set services, @Nullable Supplier nodeStateSupplier) { mServices = ImmutableSet.copyOf(Objects.requireNonNull(services, "services is null")); + mNodeStateSupplier = nodeStateSupplier; } @Override @SuppressFBWarnings(value = "DB_DUPLICATE_SWITCH_CLAUSES") public void getServiceVersion(GetServiceVersionPRequest request, StreamObserver responseObserver) { + // getAllowedOnStandbyMasters() is defaulted to false + if (!request.getAllowedOnStandbyMasters() && mStandbyRpcEnabled + && mNodeStateSupplier != null && mNodeStateSupplier.get() == NodeState.STANDBY) { + responseObserver.onError(Status.UNAVAILABLE + .withDescription("GetServiceVersion is not supported on standby master") + .asException()); + return; + } ServiceType serviceType = request.getServiceType(); if (serviceType != ServiceType.UNKNOWN_SERVICE && !mServices.contains(serviceType)) { @@ -79,6 +97,9 @@ public void getServiceVersion(GetServiceVersionPRequest request, case META_MASTER_MASTER_SERVICE: serviceVersion = Constants.META_MASTER_MASTER_SERVICE_VERSION; break; + case META_MASTER_PROXY_SERVICE: + serviceVersion = Constants.META_MASTER_PROXY_SERVICE_VERSION; + break; case METRICS_MASTER_CLIENT_SERVICE: serviceVersion = Constants.METRICS_MASTER_CLIENT_SERVICE_VERSION; break; @@ -88,6 +109,9 @@ public void getServiceVersion(GetServiceVersionPRequest request, case JOB_MASTER_WORKER_SERVICE: serviceVersion = Constants.JOB_MASTER_WORKER_SERVICE_VERSION; break; + case JOB_MASTER_MASTER_SERVICE: + serviceVersion = Constants.JOB_MASTER_MASTER_SERVICE_VERSION; + break; case JOURNAL_MASTER_CLIENT_SERVICE: serviceVersion = Constants.JOURNAL_MASTER_CLIENT_SERVICE_VERSION; break; diff --git a/core/common/src/main/java/alluxio/grpc/WriteRequestMarshaller.java b/core/common/src/main/java/alluxio/grpc/WriteRequestMarshaller.java index b25c0bce68f8..bcb1cb4192db 100644 --- a/core/common/src/main/java/alluxio/grpc/WriteRequestMarshaller.java +++ b/core/common/src/main/java/alluxio/grpc/WriteRequestMarshaller.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.io.InputStream; +import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; /** @@ -99,7 +100,7 @@ protected WriteRequest deserialize(ReadableBuffer buffer) throws IOException { } @Override - public WriteRequest combineData(DataMessage message) { + public WriteRequest combineData(@Nullable DataMessage message) { if (message == null) { return null; } diff --git a/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java b/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java new file mode 100644 index 000000000000..64919dacbf5d --- /dev/null +++ b/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java @@ -0,0 +1,80 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.heartbeat; + +import com.google.common.base.Preconditions; +import org.apache.logging.log4j.core.util.CronExpression; + +import java.time.Duration; +import java.time.Instant; +import java.util.Date; +import java.util.Objects; + +/** +* Calculate the next interval by given cron expression. +*/ +public class CronExpressionIntervalSupplier implements SleepIntervalSupplier { + private final long mInterval; + private final CronExpression mCron; + + /** + * Constructs a new {@link CronExpressionIntervalSupplier}. + * + * @param cronExpression the cron expression + * @param fixedInterval the fixed interval + */ + public CronExpressionIntervalSupplier(CronExpression cronExpression, long fixedInterval) { + Preconditions.checkNotNull(cronExpression, "CronExpression is null"); + mInterval = fixedInterval; + mCron = cronExpression; + } + + @Override + public long getNextInterval(long previousTickedMs, long nowTimeStampMillis) { + long nextInterval = 0; + long executionTimeMs = nowTimeStampMillis - previousTickedMs; + if (executionTimeMs < mInterval) { + nextInterval = mInterval - executionTimeMs; + } + Date now = Date.from(Instant.ofEpochMilli(nowTimeStampMillis + nextInterval)); + if (mCron.isSatisfiedBy(now)) { + return nextInterval; + } + return nextInterval + Duration.between( + now.toInstant(), mCron.getNextValidTimeAfter(now).toInstant()).toMillis(); + } + + @Override + public long getRunLimit(long previousTickedMs) { + Date now = Date.from(Instant.ofEpochMilli(previousTickedMs)); + return Duration.between(now.toInstant(), + mCron.getNextInvalidTimeAfter(now).toInstant()).toMillis(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + CronExpressionIntervalSupplier that = (CronExpressionIntervalSupplier) o; + return mInterval == that.mInterval + && Objects.equals(mCron.getCronExpression(), that.mCron.getCronExpression()); + } + + @Override + public int hashCode() { + return Objects.hash(mInterval, mCron.getCronExpression()); + } +} diff --git a/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java b/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java new file mode 100644 index 000000000000..da816ef8580b --- /dev/null +++ b/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java @@ -0,0 +1,82 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.heartbeat; + +import org.slf4j.Logger; +import org.slf4j.helpers.NOPLogger; + +import java.util.Objects; + +/** + * Fixed interval supplier. + */ +public class FixedIntervalSupplier implements SleepIntervalSupplier { + + private final long mInterval; + protected final Logger mLogger; + + /** + * Constructs a new {@link FixedIntervalSupplier}. + * + * @param fixedInterval the fixed interval + * @param logger the logger + */ + public FixedIntervalSupplier(long fixedInterval, Logger logger) { + mInterval = fixedInterval; + mLogger = logger; + } + + /** + * Constructs a new {@link FixedIntervalSupplier}. + * + * @param fixedInterval the fixed interval + */ + public FixedIntervalSupplier(long fixedInterval) { + this(fixedInterval, NOPLogger.NOP_LOGGER); + } + + @Override + public long getNextInterval(long previousTickedMs, long nowTimeStampMillis) { + if (previousTickedMs == -1) { + return -1; + } + long executionTimeMs = nowTimeStampMillis - previousTickedMs; + if (executionTimeMs > mInterval) { + mLogger.warn("{} last execution took {} ms. Longer than the interval {}", + Thread.currentThread().getName(), executionTimeMs, mInterval); + return 0; + } + return mInterval - executionTimeMs; + } + + @Override + public long getRunLimit(long previousTickedMs) { + return mInterval; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + FixedIntervalSupplier that = (FixedIntervalSupplier) o; + return mInterval == that.mInterval; + } + + @Override + public int hashCode() { + return Objects.hash(mInterval); + } +} diff --git a/core/common/src/main/java/alluxio/heartbeat/HeartbeatContext.java b/core/common/src/main/java/alluxio/heartbeat/HeartbeatContext.java index c5378bf0f141..b77444e354e6 100644 --- a/core/common/src/main/java/alluxio/heartbeat/HeartbeatContext.java +++ b/core/common/src/main/java/alluxio/heartbeat/HeartbeatContext.java @@ -32,6 +32,8 @@ public final class HeartbeatContext { // Names of different heartbeat executors. public static final String FUSE_UPDATE_CHECK = "Fuse update check"; public static final String JOB_MASTER_LOST_WORKER_DETECTION = "Job Master Lost Worker Detection"; + public static final String JOB_MASTER_LOST_MASTER_DETECTION = "Job Master Lost Master Detection"; + public static final String JOB_MASTER_SYNC = "Job Master Sync"; public static final String JOB_WORKER_COMMAND_HANDLING = "Job Worker Command Handling"; public static final String MASTER_THROTTLE = "Master Throttle"; @@ -47,6 +49,7 @@ public final class HeartbeatContext { public static final String MASTER_LOST_FILES_DETECTION = "Master Lost Files Detection"; public static final String MASTER_LOST_MASTER_DETECTION = "Master Lost Master Detection"; public static final String MASTER_LOST_WORKER_DETECTION = "Master Lost Worker Detection"; + public static final String MASTER_LOST_PROXY_DETECTION = "Master Lost Proxy Detection"; public static final String MASTER_METRICS_SYNC = "Master Metrics Sync"; public static final String MASTER_METRICS_TIME_SERIES = "Master Metrics Time Series"; public static final String MASTER_ORPHANED_METRICS_CLEANER = "Master Orphaned Metrics Cleaner"; @@ -59,6 +62,7 @@ public final class HeartbeatContext { public static final String MASTER_UFS_CLEANUP = "Master Ufs Cleanup"; public static final String MASTER_UPDATE_CHECK = "Master Update Check"; public static final String META_MASTER_SYNC = "Meta Master Sync"; + public static final String PROXY_META_MASTER_SYNC = "Proxy MetaMaster Sync"; public static final String WORKER_BLOCK_SYNC = "Worker Block Sync"; public static final String WORKER_CLIENT = "Worker Client"; public static final String WORKER_FILESYSTEM_MASTER_SYNC = "Worker FileSystemMaster Sync"; @@ -124,6 +128,8 @@ private HeartbeatType(int value) { sTimerClasses = new HashMap<>(); sTimerClasses.put(MASTER_THROTTLE, SLEEPING_TIMER_CLASS); sTimerClasses.put(JOB_MASTER_LOST_WORKER_DETECTION, SLEEPING_TIMER_CLASS); + sTimerClasses.put(JOB_MASTER_LOST_MASTER_DETECTION, SLEEPING_TIMER_CLASS); + sTimerClasses.put(JOB_MASTER_SYNC, SLEEPING_TIMER_CLASS); sTimerClasses.put(JOB_WORKER_COMMAND_HANDLING, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_ACTIVE_UFS_SYNC, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_BLOCK_INTEGRITY_CHECK, SLEEPING_TIMER_CLASS); @@ -136,6 +142,7 @@ private HeartbeatType(int value) { sTimerClasses.put(MASTER_LOST_FILES_DETECTION, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_LOST_MASTER_DETECTION, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_LOST_WORKER_DETECTION, SLEEPING_TIMER_CLASS); + sTimerClasses.put(MASTER_LOST_PROXY_DETECTION, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_METRICS_SYNC, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_METRICS_TIME_SERIES, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_PERSISTENCE_CHECKER, SLEEPING_TIMER_CLASS); @@ -147,6 +154,7 @@ private HeartbeatType(int value) { sTimerClasses.put(MASTER_UFS_CLEANUP, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_UPDATE_CHECK, SLEEPING_TIMER_CLASS); sTimerClasses.put(META_MASTER_SYNC, SLEEPING_TIMER_CLASS); + sTimerClasses.put(PROXY_META_MASTER_SYNC, SLEEPING_TIMER_CLASS); sTimerClasses.put(WORKER_BLOCK_SYNC, SLEEPING_TIMER_CLASS); sTimerClasses.put(WORKER_CLIENT, SLEEPING_TIMER_CLASS); sTimerClasses.put(WORKER_FILESYSTEM_MASTER_SYNC, SLEEPING_TIMER_CLASS); diff --git a/core/common/src/main/java/alluxio/heartbeat/HeartbeatExecutor.java b/core/common/src/main/java/alluxio/heartbeat/HeartbeatExecutor.java index a10c4662c5c5..2b8e96ec7532 100644 --- a/core/common/src/main/java/alluxio/heartbeat/HeartbeatExecutor.java +++ b/core/common/src/main/java/alluxio/heartbeat/HeartbeatExecutor.java @@ -15,15 +15,17 @@ /** * An interface for a heartbeat execution. The {@link HeartbeatThread} calls the - * {@link #heartbeat()} method. + * {@link #heartbeat(long)} method. */ public interface HeartbeatExecutor extends Closeable { + /** * Implements the heartbeat logic. * + * @param timeLimitMs time limit in milliseconds this heartbeat should not exceed when running * @throws InterruptedException if the thread is interrupted */ - void heartbeat() throws InterruptedException; + void heartbeat(long timeLimitMs) throws InterruptedException; /** * Cleans up any resources used by the heartbeat executor. diff --git a/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java b/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java index 82a0a504632f..b7fc2342c7ca 100644 --- a/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java +++ b/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java @@ -12,6 +12,8 @@ package alluxio.heartbeat; import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.Reconfigurable; +import alluxio.conf.ReconfigurableRegistry; import alluxio.security.authentication.AuthenticatedClientUser; import alluxio.security.user.UserState; import alluxio.util.CommonUtils; @@ -23,6 +25,8 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.time.Clock; +import java.util.function.Supplier; import javax.annotation.concurrent.NotThreadSafe; /** @@ -38,6 +42,7 @@ public final class HeartbeatThread implements Runnable { private final UserState mUserState; private HeartbeatTimer mTimer; private AlluxioConfiguration mConfiguration; + private Status mStatus; /** * @param executorName the executor name defined in {@link HeartbeatContext} @@ -65,40 +70,69 @@ public static String generateThreadName(String executorName, String threadId) { * thread's name is a combination of executorName and threadId * @param executor identifies the heartbeat thread executor; an instance of a class that * implements the HeartbeatExecutor interface - * @param intervalMs Sleep time between different heartbeat + * @param intervalSupplier Sleep time between different heartbeat supplier * @param conf Alluxio configuration * @param userState the user state for this heartbeat thread + * @param clock the clock used to compute the current time */ public HeartbeatThread(String executorName, String threadId, HeartbeatExecutor executor, - long intervalMs, AlluxioConfiguration conf, UserState userState) { + Supplier intervalSupplier, + AlluxioConfiguration conf, UserState userState, Clock clock) { mThreadName = generateThreadName(executorName, threadId); mExecutor = Preconditions.checkNotNull(executor, "executor"); Class timerClass = HeartbeatContext.getTimerClass(executorName); - mTimer = CommonUtils.createNewClassInstance(timerClass, new Class[] {String.class, long.class}, - new Object[] {mThreadName, intervalMs}); + mTimer = CommonUtils.createNewClassInstance(timerClass, + new Class[] {String.class, Clock.class, Supplier.class}, + new Object[] {mThreadName, clock, intervalSupplier}); mConfiguration = conf; mUserState = userState; + mStatus = Status.INIT; + if (mTimer instanceof Reconfigurable) { + ReconfigurableRegistry.register((Reconfigurable) mTimer); + } + } + + /** + * Convenience method for + * {@link + * #HeartbeatThread(String, String, HeartbeatExecutor, Supplier, AlluxioConfiguration, + * UserState, Clock)} where threadId is null. + * + * @param executorName the executor name that is one of those defined in {@link HeartbeatContext} + * @param executor the heartbeat executor + * @param intervalSupplier the interval between heartbeats supplier + * @param conf the Alluxio configuration + * @param userState the user state for this heartbeat thread + */ + public HeartbeatThread(String executorName, HeartbeatExecutor executor, + Supplier intervalSupplier, AlluxioConfiguration conf, + UserState userState) { + this(executorName, null, executor, intervalSupplier, conf, userState, Clock.systemUTC()); } /** * Convenience method for * {@link - * #HeartbeatThread(String, String, HeartbeatExecutor, long, AlluxioConfiguration, UserState)} - * where threadId is null. + * #HeartbeatThread(String, String, HeartbeatExecutor, Supplier, AlluxioConfiguration, + * UserState, Clock)} where threadId is null. * * @param executorName the executor name that is one of those defined in {@link HeartbeatContext} * @param executor the heartbeat executor - * @param intervalMs the interval between heartbeats + * @param intervalSupplier the interval between heartbeats supplier * @param conf the Alluxio configuration * @param userState the user state for this heartbeat thread + * @param clock the clock used to compute the current time */ - public HeartbeatThread(String executorName, HeartbeatExecutor executor, long intervalMs, - AlluxioConfiguration conf, UserState userState) { - this(executorName, null, executor, intervalMs, conf, userState); + public HeartbeatThread(String executorName, HeartbeatExecutor executor, + Supplier intervalSupplier, + AlluxioConfiguration conf, UserState userState, Clock clock) { + this(executorName, null, executor, intervalSupplier, + conf, userState, clock); } @Override public void run() { + long counter = 0L; try { if (SecurityUtils.isSecurityEnabled(mConfiguration) && AuthenticatedClientUser.get(mConfiguration) == null) { @@ -114,24 +148,39 @@ public void run() { // Thread.interrupted() clears the interrupt status. Do not call interrupt again to clear it. while (!Thread.interrupted()) { // TODO(peis): Fix this. The current implementation consumes one thread even when ticking. - mTimer.tick(); - mExecutor.heartbeat(); + mStatus = Status.WAITING; + long limitTime = mTimer.tick(); + mStatus = Status.RUNNING; + LOG.debug("{} #{} will run limited in {}s", mThreadName, counter++, limitTime / 1000); + mExecutor.heartbeat(limitTime); } } catch (InterruptedException e) { // Allow thread to exit. } catch (Exception e) { LOG.error("Uncaught exception in heartbeat executor, Heartbeat Thread shutting down", e); } finally { + mStatus = Status.STOPPED; + if (mTimer instanceof Reconfigurable) { + ReconfigurableRegistry.unregister((Reconfigurable) mTimer); + } mExecutor.close(); } } /** - * Updates the heartbeat interval. - * - * @param intervalMs the heartbeat interval in ms + * @return the status of current heartbeat thread + */ + public Status getStatus() { + return mStatus; + } + + /** + * Enum representing the status of HeartbeatThread. */ - public void updateIntervalMs(long intervalMs) { - mTimer.setIntervalMs(intervalMs); + public enum Status { + INIT, + WAITING, + RUNNING, + STOPPED, } } diff --git a/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java b/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java index e68738f4799b..cd847922ef33 100644 --- a/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java +++ b/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java @@ -16,19 +16,12 @@ */ public interface HeartbeatTimer { - /** - * Sets the heartbeat interval. - * - * @param intervalMs the heartbeat interval in ms - */ - default void setIntervalMs(long intervalMs) { - throw new UnsupportedOperationException("Setting interval is not supported"); - } - /** * Waits until next heartbeat should be executed. * + * @return time limit in milliseconds for this heartbeat action to run for before + * the next heartbeat is due. * @throws InterruptedException if the thread is interrupted while waiting */ - void tick() throws InterruptedException; + long tick() throws InterruptedException; } diff --git a/core/common/src/main/java/alluxio/heartbeat/ScheduledTimer.java b/core/common/src/main/java/alluxio/heartbeat/ScheduledTimer.java index 62b6d5667d83..cff75372105c 100644 --- a/core/common/src/main/java/alluxio/heartbeat/ScheduledTimer.java +++ b/core/common/src/main/java/alluxio/heartbeat/ScheduledTimer.java @@ -15,9 +15,11 @@ import com.google.common.base.Preconditions; +import java.time.Clock; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; +import java.util.function.Supplier; import javax.annotation.concurrent.ThreadSafe; /** @@ -46,9 +48,11 @@ public final class ScheduledTimer implements HeartbeatTimer { * Creates a new instance of {@link ScheduledTimer}. * * @param threadName the thread name - * @param intervalMs the heartbeat interval (unused) + * @param clock for telling the current time (unused) + * @param intervalSupplierSupplier Sleep time between different heartbeat supplier */ - public ScheduledTimer(String threadName, long intervalMs) { + public ScheduledTimer(String threadName, Clock clock, + Supplier intervalSupplierSupplier) { mThreadName = threadName; mLock = new ReentrantLock(); mTickCondition = mLock.newCondition(); @@ -77,7 +81,7 @@ protected void schedule() { } @Override - public void tick() throws InterruptedException { + public long tick() throws InterruptedException { try (LockResource r = new LockResource(mLock)) { HeartbeatScheduler.addTimer(this); // Wait in a loop to handle spurious wakeups @@ -87,5 +91,6 @@ public void tick() throws InterruptedException { mScheduled = false; } + return Long.MAX_VALUE; } } diff --git a/core/common/src/main/java/alluxio/heartbeat/SleepIntervalSupplier.java b/core/common/src/main/java/alluxio/heartbeat/SleepIntervalSupplier.java new file mode 100644 index 000000000000..b022839f3229 --- /dev/null +++ b/core/common/src/main/java/alluxio/heartbeat/SleepIntervalSupplier.java @@ -0,0 +1,34 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.heartbeat; + +/** + * A policy to calculate the next interval to sleep. + */ +public interface SleepIntervalSupplier { + /** + * Gets the next interval for sleeping. + * + * @param previousTickedMs previous ticked time stamp in millisecond + * @param nowTimeStampMillis current time stamp in millisecond + * @return the interval to sleep starting from now before next time the timer triggers + */ + long getNextInterval(long previousTickedMs, long nowTimeStampMillis); + + /** + * Gets the run limit from previous ticked. + * + * @param previousTickedMs previous ticked time stamp in millisecond + * @return the run limit + */ + long getRunLimit(long previousTickedMs); +} diff --git a/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java b/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java index 627ef78dfd56..0b3bf64cdf0c 100644 --- a/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java +++ b/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java @@ -11,61 +11,64 @@ package alluxio.heartbeat; -import alluxio.clock.SystemClock; +import alluxio.conf.PropertyKey; +import alluxio.conf.Reconfigurable; import alluxio.time.Sleeper; -import alluxio.time.ThreadSleeper; +import alluxio.time.SteppingThreadSleeper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.time.Clock; import java.time.Duration; +import java.util.Map; +import java.util.Objects; +import java.util.function.Supplier; import javax.annotation.concurrent.NotThreadSafe; /** * This class can be used for executing heartbeats periodically. */ @NotThreadSafe -public final class SleepingTimer implements HeartbeatTimer { - private long mIntervalMs; - private long mPreviousTickMs; +public class SleepingTimer implements HeartbeatTimer, Reconfigurable { + protected long mPreviousTickedMs = -1; private final String mThreadName; - private final Logger mLogger; - private final Clock mClock; - private final Sleeper mSleeper; + protected final Logger mLogger; + protected final Clock mClock; + protected final Sleeper mSleeper; + protected final Supplier mIntervalSupplierSupplier; + protected volatile SleepIntervalSupplier mIntervalSupplier; /** * Creates a new instance of {@link SleepingTimer}. * * @param threadName the thread name - * @param intervalMs the heartbeat interval + * @param clock for telling the current time + * @param intervalSupplierSupplier Sleep time between different heartbeat supplier */ - public SleepingTimer(String threadName, long intervalMs) { - this(threadName, intervalMs, LoggerFactory.getLogger(SleepingTimer.class), - new SystemClock(), ThreadSleeper.INSTANCE); + public SleepingTimer(String threadName, Clock clock, + Supplier intervalSupplierSupplier) { + this(threadName, LoggerFactory.getLogger(SleepingTimer.class), + clock, SteppingThreadSleeper.INSTANCE, intervalSupplierSupplier); } /** * Creates a new instance of {@link SleepingTimer}. * * @param threadName the thread name - * @param intervalMs the heartbeat interval * @param logger the logger to log to * @param clock for telling the current time * @param sleeper the utility to use for sleeping + * @param intervalSupplierSupplier Sleep time between different heartbeat supplier */ - public SleepingTimer(String threadName, long intervalMs, Logger logger, Clock clock, - Sleeper sleeper) { - mIntervalMs = intervalMs; + public SleepingTimer(String threadName, Logger logger, Clock clock, Sleeper sleeper, + Supplier intervalSupplierSupplier) { mThreadName = threadName; mLogger = logger; mClock = clock; mSleeper = sleeper; - } - - @Override - public void setIntervalMs(long intervalMs) { - mIntervalMs = intervalMs; + mIntervalSupplierSupplier = intervalSupplierSupplier; + mIntervalSupplier = intervalSupplierSupplier.get(); } /** @@ -74,16 +77,25 @@ public void setIntervalMs(long intervalMs) { * @throws InterruptedException if the thread is interrupted while waiting */ @Override - public void tick() throws InterruptedException { - if (mPreviousTickMs != 0) { - long executionTimeMs = mClock.millis() - mPreviousTickMs; - if (executionTimeMs > mIntervalMs) { - mLogger.warn("{} last execution took {} ms. Longer than the interval {}", mThreadName, - executionTimeMs, mIntervalMs); - } else { - mSleeper.sleep(Duration.ofMillis(mIntervalMs - executionTimeMs)); - } + public long tick() throws InterruptedException { + long now = mClock.millis(); + mSleeper.sleep( + () -> Duration.ofMillis(mIntervalSupplier.getNextInterval(mPreviousTickedMs, now))); + mPreviousTickedMs = mClock.millis(); + return mIntervalSupplier.getRunLimit(mPreviousTickedMs); + } + + @Override + public void update(Map changedProperties) { + update(); + } + + @Override + public void update() { + SleepIntervalSupplier newSupplier = mIntervalSupplierSupplier.get(); + if (!Objects.equals(mIntervalSupplier, newSupplier)) { + mIntervalSupplier = newSupplier; + mLogger.info("update {} interval supplier.", mThreadName); } - mPreviousTickMs = mClock.millis(); } } diff --git a/core/common/src/main/java/alluxio/job/CopyJobRequest.java b/core/common/src/main/java/alluxio/job/CopyJobRequest.java new file mode 100644 index 000000000000..598cb72f9fe7 --- /dev/null +++ b/core/common/src/main/java/alluxio/job/CopyJobRequest.java @@ -0,0 +1,82 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.job; + +import alluxio.grpc.CopyJobPOptions; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.MoreObjects; +import com.google.common.base.Preconditions; + +import javax.annotation.concurrent.ThreadSafe; + +/** + * The request of loading files. + */ +@ThreadSafe +public class CopyJobRequest implements JobRequest { + private static final String TYPE = "copy"; + private static final long serialVersionUID = -8565405317284410500L; + private final String mDst; + private final CopyJobPOptions mOptions; + private final String mSrc; + + /** + * @param src the source file path + * @param dst the destination file path + * @param options copy job options + **/ + public CopyJobRequest(@JsonProperty("src") String src, + @JsonProperty("dst") String dst, + @JsonProperty("copyJobPOptions") CopyJobPOptions options) { + mSrc = Preconditions.checkNotNull(src, "The source path cannot be null"); + + mDst = Preconditions.checkNotNull(dst, "The destination path cannot be null"); + mOptions = Preconditions.checkNotNull(options, "The job options cannot be null"); + } + + /** + * @return the source file path + */ + public String getSrc() { + return mSrc; + } + + /** + * @return the file path + */ + public String getDst() { + return mDst; + } + + /** + * @return job options + */ + public CopyJobPOptions getOptions() { + return mOptions; + } + + @Override + public String toString() { + return MoreObjects + .toStringHelper(this) + .add("Src", mSrc) + .add("Dst", mDst) + .add("Options", mOptions) + .toString(); + } + + @Override + public String getType() { + return TYPE; + } +} diff --git a/core/common/src/main/java/alluxio/job/JobDescription.java b/core/common/src/main/java/alluxio/job/JobDescription.java new file mode 100644 index 000000000000..4ab98594a202 --- /dev/null +++ b/core/common/src/main/java/alluxio/job/JobDescription.java @@ -0,0 +1,123 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.job; + +import com.google.common.base.MoreObjects; +import com.google.common.base.Objects; + +/** + * Job description that used as a key to identify the job in the scheduler. + */ +public class JobDescription { + + private final String mPath; + private final String mType; + + private JobDescription(String type, String path) { + mPath = path; + mType = type; + } + + /** + * @return the path of the job affected + */ + public String getType() { + return mType; + } + + /** + * @return the type of the job + */ + public String getPath() { + return mPath; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + JobDescription that = (JobDescription) o; + return Objects.equal(mPath, that.mPath) && Objects.equal(mType, that.mType); + } + + @Override + public int hashCode() { + return Objects.hashCode(mType, mPath); + } + + @Override + public String toString() { + return MoreObjects + .toStringHelper(this) + .add("Path", mPath) + .add("Type", mType) + .toString(); + } + + /** + * create a job description from JobDescription proto. + * @param jobDescription JobDescription proto + * @return job description + */ + public static JobDescription from(alluxio.grpc.JobDescription jobDescription) { + return new JobDescription(jobDescription.getType(), jobDescription.getPath()); + } + + /** + * @return the job description builder + */ + public static Builder newBuilder() { + return new Builder(); + } + + /** + * Builder for {@link JobDescription}. + */ + public static class Builder { + private String mPath; + private String mType; + + private Builder() {} + + /** + * set path. + * @param path affected path + * @return builder + */ + public Builder setPath(String path) { + mPath = path; + return this; + } + + /** + * set job type. + * @param type job type + * @return builder + */ + public Builder setType(String type) { + mType = type; + return this; + } + + /** + * build job description. + * @return job description + */ + public JobDescription build() { + return new JobDescription(mType, mPath); + } + } +} diff --git a/core/common/src/main/java/alluxio/job/JobRequest.java b/core/common/src/main/java/alluxio/job/JobRequest.java new file mode 100644 index 000000000000..01d8eaf3861c --- /dev/null +++ b/core/common/src/main/java/alluxio/job/JobRequest.java @@ -0,0 +1,31 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.job; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonTypeInfo; + +import java.io.Serializable; + +/** + * A job request that can be used to create Job. All the subclasses are both Java and JSON + * serializable. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@type") +public interface JobRequest extends Serializable { + + /** + * @return the type of the job + */ + String getType(); +} diff --git a/core/common/src/main/java/alluxio/job/LoadJobRequest.java b/core/common/src/main/java/alluxio/job/LoadJobRequest.java new file mode 100644 index 000000000000..818d4e75e9d0 --- /dev/null +++ b/core/common/src/main/java/alluxio/job/LoadJobRequest.java @@ -0,0 +1,69 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.job; + +import alluxio.grpc.LoadJobPOptions; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.MoreObjects; +import com.google.common.base.Preconditions; + +import javax.annotation.concurrent.ThreadSafe; + +/** + * The request of loading files. + */ +@ThreadSafe +public class LoadJobRequest implements JobRequest { + private static final String TYPE = "load"; + private static final long serialVersionUID = -4100882786127020489L; + private final String mPath; + private final LoadJobPOptions mOptions; + + /** + * @param path the file path + * @param options load job options + **/ + public LoadJobRequest(@JsonProperty("path") String path, + @JsonProperty("loadJobPOptions") LoadJobPOptions options) { + mPath = Preconditions.checkNotNull(path, "The file path cannot be null"); + mOptions = Preconditions.checkNotNull(options, "The load job options cannot be null"); + } + + /** + * @return the file path + */ + public String getPath() { + return mPath; + } + + /** + * @return job options + */ + public LoadJobPOptions getOptions() { + return mOptions; + } + + @Override + public String toString() { + return MoreObjects + .toStringHelper(this) + .add("Path", mPath) + .add("Options", mOptions) + .toString(); + } + + @Override + public String getType() { + return TYPE; + } +} diff --git a/core/common/src/main/java/alluxio/master/PollingMasterInquireClient.java b/core/common/src/main/java/alluxio/master/PollingMasterInquireClient.java index 96875ec915c0..849a154ad5af 100644 --- a/core/common/src/main/java/alluxio/master/PollingMasterInquireClient.java +++ b/core/common/src/main/java/alluxio/master/PollingMasterInquireClient.java @@ -18,6 +18,7 @@ import alluxio.exception.status.AlluxioStatusException; import alluxio.exception.status.CancelledException; import alluxio.exception.status.DeadlineExceededException; +import alluxio.exception.status.NotFoundException; import alluxio.exception.status.UnavailableException; import alluxio.grpc.GetServiceVersionPRequest; import alluxio.grpc.GrpcChannel; @@ -32,14 +33,21 @@ import alluxio.uri.MultiMasterAuthority; import com.google.common.collect.Lists; +import com.google.common.util.concurrent.ThreadFactoryBuilder; import io.grpc.StatusRuntimeException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.net.InetSocketAddress; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Objects; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; import javax.annotation.Nullable; @@ -51,6 +59,13 @@ */ public class PollingMasterInquireClient implements MasterInquireClient { private static final Logger LOG = LoggerFactory.getLogger(PollingMasterInquireClient.class); + private static final ExecutorService EXECUTOR_SERVICE = + Executors.newCachedThreadPool( + new ThreadFactoryBuilder() + .setDaemon(true) + .setNameFormat("pollingMasterThread-%d") + .build() + ); private final MultiMasterConnectDetails mConnectDetails; private final Supplier mRetryPolicySupplier; @@ -128,27 +143,81 @@ private InetSocketAddress getAddress() { addresses = mConnectDetails.getAddresses(); } + if (mConfiguration.getBoolean(PropertyKey.USER_MASTER_POLLING_CONCURRENT)) { + return findActiveAddressConcurrent(addresses); + } else { + return findActiveAddress(addresses); + } + } + + @Nullable + private InetSocketAddress findActiveAddressConcurrent(List addresses) { + List> futures = new ArrayList<>(addresses.size()); + try { + ExecutorCompletionService completionService = + new ExecutorCompletionService<>(EXECUTOR_SERVICE); + for (InetSocketAddress address : addresses) { + futures.add(completionService.submit(() -> checkActiveAddress(address))); + } + for (int i = 0; i < addresses.size(); i++) { + try { + Future future = completionService.take(); + InetSocketAddress address = future.get(); + if (address != null) { + return address; + } + } catch (InterruptedException | ExecutionException e) { + break; + } + } + return null; + } finally { + futures.forEach(it -> it.cancel(true)); + } + } + + @Nullable + private InetSocketAddress findActiveAddress(List addresses) { for (InetSocketAddress address : addresses) { try { - LOG.debug("Checking whether {} is listening for RPCs", address); - pingMetaService(address); - LOG.debug("Successfully connected to {}", address); - return address; - } catch (UnavailableException e) { - LOG.debug("Failed to connect to {}", address); - } catch (DeadlineExceededException e) { - LOG.debug("Timeout while connecting to {}", address); - } catch (CancelledException e) { - LOG.debug("Cancelled while connecting to {}", address); + if (checkActiveAddress(address) != null) { + return address; + } } catch (AlluxioStatusException e) { - LOG.error("Error while connecting to {}. {}", address, e); - // Breaking the loop on non filtered error. break; } } return null; } + private InetSocketAddress checkActiveAddress(InetSocketAddress address) + throws AlluxioStatusException { + try { + LOG.debug("Checking whether {} is listening for RPCs", address); + pingMetaService(address); + LOG.debug("Successfully connected to {}", address); + return address; + } catch (UnavailableException e) { + LOG.debug("Failed to connect to {}", address); + return null; + } catch (DeadlineExceededException e) { + LOG.debug("Timeout while connecting to {}", address); + return null; + } catch (CancelledException e) { + LOG.debug("Cancelled while connecting to {}", address); + return null; + } catch (NotFoundException e) { + // If the gRPC server is enabled but the metadata service isn't enabled, + // try the next master address. + LOG.debug("Meta service rpc endpoint not found on {}. {}", address, e); + return null; + } catch (AlluxioStatusException e) { + LOG.error("Error while connecting to {}. {}", address, e); + // Breaking the loop on non filtered error. + throw e; + } + } + private void pingMetaService(InetSocketAddress address) throws AlluxioStatusException { // disable authentication in the channel since version service does not require authentication GrpcChannel channel = diff --git a/core/common/src/main/java/alluxio/master/WorkerState.java b/core/common/src/main/java/alluxio/master/WorkerState.java new file mode 100644 index 000000000000..8b4572fa7cde --- /dev/null +++ b/core/common/src/main/java/alluxio/master/WorkerState.java @@ -0,0 +1,32 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master; + +/*** + * The worker state maintained by master. + */ +public enum WorkerState { + LIVE("ACTIVE"), + LOST("LOST"), + DECOMMISSIONED("Decommissioned"), + DISABLED("Disabled"); + private final String mState; + + WorkerState(String s) { + mState = s; + } + + @Override + public String toString() { + return mState; + } +} diff --git a/core/common/src/main/java/alluxio/master/metastore/rocks/RocksExclusiveLockHandle.java b/core/common/src/main/java/alluxio/master/metastore/rocks/RocksExclusiveLockHandle.java new file mode 100644 index 000000000000..c742012466f8 --- /dev/null +++ b/core/common/src/main/java/alluxio/master/metastore/rocks/RocksExclusiveLockHandle.java @@ -0,0 +1,48 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore.rocks; + +import alluxio.exception.runtime.AlluxioRuntimeException; + +import java.util.concurrent.Callable; + +/** + * This is a handle used to manage the write lock(exclusive lock) on RocksStore. + * The exclusive lock is acquired when ref count is zero, and the StopServing flag ensures + * no new r/w will come in, so the ref count will stay zero throughout the period. + * + * One exception is when the exclusive lock is forced (ignoring uncompleted r/w operations), + * when the reader comes back the exclusive lock is already held. At this moment when the late + * reader comes back, it should not update the ref count anymore. See Javadoc on + * {@link RocksSharedLockHandle#close()} for how that is handled. + */ +public class RocksExclusiveLockHandle implements AutoCloseable { + private final Callable mCloseAction; + + /** + * The constructor. + * @param closeAction the action called on close + */ + public RocksExclusiveLockHandle(Callable closeAction) { + mCloseAction = closeAction; + } + + @Override + public void close() { + try { + mCloseAction.call(); + } catch (Exception e) { + // From the current usage in RocksStore, this is unreachable + throw AlluxioRuntimeException.from(e); + } + } +} diff --git a/core/common/src/main/java/alluxio/master/metastore/rocks/RocksSharedLockHandle.java b/core/common/src/main/java/alluxio/master/metastore/rocks/RocksSharedLockHandle.java new file mode 100644 index 000000000000..c38504caa6fc --- /dev/null +++ b/core/common/src/main/java/alluxio/master/metastore/rocks/RocksSharedLockHandle.java @@ -0,0 +1,59 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore.rocks; + +import java.util.concurrent.atomic.LongAdder; + +/** + * This is a handle used to manage a read lock(shared lock) on RocksStore. + * When the shared lock is held, exclusive locks will wait. That guarantees the RocksDB + * is not wiped out/closed while an r/w operation is active. + * + * RocksStore uses ref count for locking so releasing a read lock is just decrementing the + * reference count. + */ +public class RocksSharedLockHandle implements AutoCloseable { + private final int mDbVersion; + private final LongAdder mRefCount; + + /** + * The constructor. + * + * @param dbVersion The RocksDB version. This version is updated when the RocksDB + * is restored or wiped out. + * @param refCount the ref count to decrement on close + */ + public RocksSharedLockHandle(int dbVersion, LongAdder refCount) { + mDbVersion = dbVersion; + mRefCount = refCount; + } + + /** + * Gets the version on the lock. + * @return version + */ + public int getLockVersion() { + return mDbVersion; + } + + @Override + public void close() { + /* + * If the exclusive lock has been forced and the ref count is reset, this reference will point + * to an out-of-date counter. Therefore, we can just update this counter without concerns. + * If the exclusive lock is has NOT been forced, we decrement the ref count normally. + * If the exclusive lock has been forced, we decrement an irrelevant counter which will never + * be read. + */ + mRefCount.decrement(); + } +} diff --git a/core/common/src/main/java/alluxio/master/metastore/rocks/RocksUtils.java b/core/common/src/main/java/alluxio/master/metastore/rocks/RocksUtils.java index 493787f00850..cdada2019182 100644 --- a/core/common/src/main/java/alluxio/master/metastore/rocks/RocksUtils.java +++ b/core/common/src/main/java/alluxio/master/metastore/rocks/RocksUtils.java @@ -20,6 +20,7 @@ import java.util.Iterator; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Supplier; /** * Convenience methods for working with RocksDB. @@ -94,13 +95,31 @@ public interface RocksIteratorParser { * Used to wrap an {@link CloseableIterator} over {@link RocksIterator}. * It seeks given iterator to first entry before returning the iterator. * + * The Iterator is associated with a shared lock to the RocksStore. The lock should be acquired + * by the caller (See java doc on RocksStore.checkAndAcquireSharedLock()) for how. + * And the lock is held throughout the lifecycle of this iterator until it is closed + * either on completion or on exception. This shared lock guarantees thread safety when + * accessing the RocksDB. In other words, when this shared lock is held, the underlying + * RocksDB will not be stopped/restarted. + * + * The abortCheck defines a way to voluntarily abort the iteration. This typically happens + * when the underlying RocksDB will be closed/restart/checkpointed, where all accesses should + * be stopped. + * + * With the thread safety baked into hasNext() and next(), users of this Iterator do not need + * to worry about safety and can use this Iterator normally. + * See examples in how this iterator is used in RocksBlockMetaStore and RocksInodeStore. + * * @param rocksIterator the rocks iterator * @param parser parser to produce iterated values from rocks key-value * @param iterator value type + * @param abortCheck if true, abort the iteration + * @param rocksDbSharedLock the shared lock acquired by the iterator * @return wrapped iterator */ public static CloseableIterator createCloseableIterator( - RocksIterator rocksIterator, RocksIteratorParser parser) { + RocksIterator rocksIterator, RocksIteratorParser parser, + Supplier abortCheck, RocksSharedLockHandle rocksDbSharedLock) { rocksIterator.seekToFirst(); AtomicBoolean valid = new AtomicBoolean(true); Iterator iter = new Iterator() { @@ -111,23 +130,41 @@ public boolean hasNext() { @Override public T next() { + boolean succeeded = false; + + /* + * If the RocksDB wants to stop, abort the loop instead of finishing it. + * The abortCheck will throw an exception, which closes the CloseableIterator + * if the CloseableIterator is correctly put in a try-with-resource section. + */ + abortCheck.get(); + try { - return parser.next(rocksIterator); + T result = parser.next(rocksIterator); + rocksIterator.next(); + succeeded = true; + return result; } catch (Exception exc) { LOG.warn("Iteration aborted because of error", exc); - rocksIterator.close(); - valid.set(false); throw new RuntimeException(exc); } finally { - rocksIterator.next(); - if (!rocksIterator.isValid()) { - rocksIterator.close(); + if (!succeeded) { valid.set(false); + rocksIterator.close(); } } } }; - return CloseableIterator.create(iter, (whatever) -> rocksIterator.close()); + return CloseableIterator.create(iter, (whatever) -> { + try { + rocksIterator.close(); + } finally { + if (rocksDbSharedLock != null) { + // Release the lock after recycling the iterator safely + rocksDbSharedLock.close(); + } + } + }); } } diff --git a/core/common/src/main/java/alluxio/master/selectionpolicy/MasterSelectionPolicy.java b/core/common/src/main/java/alluxio/master/selectionpolicy/MasterSelectionPolicy.java index cbfe753822e3..855914bcc291 100644 --- a/core/common/src/main/java/alluxio/master/selectionpolicy/MasterSelectionPolicy.java +++ b/core/common/src/main/java/alluxio/master/selectionpolicy/MasterSelectionPolicy.java @@ -21,6 +21,16 @@ * determines which master node a client should connect to. */ public interface MasterSelectionPolicy { + /** + * The enum for master selection policies. + */ + enum Type { + PRIMARY_MASTER, + ANY_STANDBY_MASTER, + ANY_MASTER, + SPECIFIED_MASTER, + } + /** * Get and cache the primary master address. * @@ -48,6 +58,11 @@ InetSocketAddress getGrpcMasterAddress(MasterInquireClient masterInquireClient) */ void resetPrimaryMasterAddressCache(); + /** + * @return the type of the master selection policy + */ + Type getType(); + /** * Factory for {@link MasterSelectionPolicy}. */ diff --git a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyMaster.java b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyMaster.java index c9d0215d80d1..c76709613ccb 100644 --- a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyMaster.java +++ b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyMaster.java @@ -36,4 +36,9 @@ public synchronized InetSocketAddress getGrpcMasterAddress( Collections.shuffle(masterAddresses); return masterAddresses.get(0); } + + @Override + public Type getType() { + return Type.ANY_MASTER; + } } diff --git a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyStandbyMaster.java b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyStandbyMaster.java index 1ef1a299ccc1..8e23504c5892 100644 --- a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyStandbyMaster.java +++ b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyStandbyMaster.java @@ -65,4 +65,9 @@ public synchronized InetSocketAddress getGrpcMasterAddress( } throw new UnavailableException("No standby masters available"); } + + @Override + public Type getType() { + return Type.ANY_STANDBY_MASTER; + } } diff --git a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyPrimaryMaster.java b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyPrimaryMaster.java index 7f8b07a4153d..8b31380a37cb 100644 --- a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyPrimaryMaster.java +++ b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyPrimaryMaster.java @@ -28,4 +28,9 @@ public synchronized InetSocketAddress getGrpcMasterAddress( mPrimaryMasterAddress = masterInquireClient.getPrimaryRpcAddress(); return mPrimaryMasterAddress; } + + @Override + public Type getType() { + return Type.PRIMARY_MASTER; + } } diff --git a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicySpecifiedMaster.java b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicySpecifiedMaster.java index e1c765a54159..ddefb397f749 100644 --- a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicySpecifiedMaster.java +++ b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicySpecifiedMaster.java @@ -36,5 +36,10 @@ public synchronized InetSocketAddress getGrpcMasterAddress( MasterInquireClient masterInquireClient) throws UnavailableException { return mMasterAddressToConnect; } + + @Override + public Type getType() { + return Type.SPECIFIED_MASTER; + } } diff --git a/core/common/src/main/java/alluxio/metrics/MetricKey.java b/core/common/src/main/java/alluxio/metrics/MetricKey.java index c75723028cef..8e40eefef70a 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricKey.java +++ b/core/common/src/main/java/alluxio/metrics/MetricKey.java @@ -322,12 +322,14 @@ public static String getSyncMetricName(long mountId) { // Master file statistics public static final MetricKey MASTER_FILES_PINNED = new Builder("Master.FilesPinned") - .setDescription("Total number of currently pinned files") + .setDescription("Total number of currently pinned files. " + + "Note that IDs for these files are stored in memory.") .setMetricType(MetricType.GAUGE) .build(); public static final MetricKey MASTER_FILES_TO_PERSIST = new Builder("Master.FilesToBePersisted") - .setDescription("Total number of currently to be persisted files") + .setDescription("Total number of currently to be persisted files." + + " Note that the IDs for these files are stored in memory.") .setMetricType(MetricType.GAUGE) .build(); public static final MetricKey MASTER_FILE_SIZE = @@ -408,6 +410,11 @@ public static String getSyncMetricName(long mountId) { .setDescription("Total number of unique blocks in Alluxio") .setMetricType(MetricType.GAUGE) .build(); + public static final MetricKey MASTER_CACHED_BLOCK_LOCATIONS = + new Builder("Master.CachedBlockLocations") + .setDescription("Total number of cached block locations") + .setMetricType(MetricType.GAUGE) + .build(); public static final MetricKey MASTER_TOTAL_RPCS = new Builder("Master.TotalRpcs") .setDescription("Throughput of master RPC calls. This metrics indicates how busy the" @@ -420,6 +427,20 @@ public static String getSyncMetricName(long mountId) { .setDescription("Total number of block replicas in Alluxio") .setMetricType(MetricType.GAUGE) .build(); + public static final MetricKey MASTER_TTL_BUCKETS = + new Builder("Master.TTLBuckets") + .setDescription("The number of TTL buckets at the master. Note that these buckets" + + " are stored in memory.") + .setMetricType(MetricType.GAUGE) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_TTL_INODES = + new Builder("Master.TTLInodes") + .setDescription("The total number of inodes contained in TTL buckets at the mater." + + " Note that these inodes are stored in memory.") + .setMetricType(MetricType.GAUGE) + .setIsClusterAggregated(false) + .build(); public static final MetricKey MASTER_INODE_HEAP_SIZE = new Builder("Master.InodeHeapSize") .setDescription("An estimate of the inode heap size") @@ -617,6 +638,13 @@ public static String getSyncMetricName(long mountId) { .setDescription("Total number of Mount operations") .setMetricType(MetricType.COUNTER) .build(); + public static final MetricKey MASTER_REPLICATION_LIMITED_FILES = + new Builder("Master.ReplicationLimitedFiles") + .setDescription("Number of files that have a replication count set to a " + + "non-default value. Note that these files have IDs that are stored " + + "in memory.") + .setMetricType(MetricType.COUNTER) + .build(); public static final MetricKey MASTER_RENAME_PATH_OPS = new Builder("Master.RenamePathOps") .setDescription("Total number of Rename operations") @@ -687,6 +715,109 @@ public static String getSyncMetricName(long mountId) { .setMetricType(MetricType.TIMER) .build(); + // Metadata sync v2 metrics + public static final MetricKey MASTER_METADATA_SYNC_QUEUED_LOADS = + new Builder("Master.MetadataSyncV2QueuedLoads") + .setDescription("Total number of load requests that are pending") + .setMetricType(MetricType.GAUGE) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_RUNNING_LOADS = + new Builder("Master.MetadataSyncV2RunningLoads") + .setDescription("The number of load requests that are in progress or" + + " have completed, but not yet been processed") + .setMetricType(MetricType.GAUGE) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_RUNNING_TASKS = + new Builder("Master.MetadataSyncV2RunningTasks") + .setDescription("The number of metadata sync tasks currently running") + .setMetricType(MetricType.GAUGE) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_COMPLETED_TASKS = + new Builder("Master.MetadataSyncV2CompletedTasks") + .setDescription("The number of completed metadata sync tasks") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FAILED_TASKS = + new Builder("Master.MetadataSyncV2FailedTasks") + .setDescription("The number of failed metadata sync tasks") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_CANCELLED_TASKS = + new Builder("Master.MetadataSyncV2CancelledTasks") + .setDescription("The number of cancelled metadata sync tasks") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_LOADS_FAILED = + new Builder("Master.MetadataSyncV2LoadsFailed") + .setDescription("The number of failed load requests during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_PROCESSING_FAILED = + new Builder("Master.MetadataSyncV2ProcessingFailed") + .setDescription("The number loads failed during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_CREATED = + new Builder("Master.MetadataSyncV2FilesCreated") + .setDescription("The number of files created during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_DELETED = + new Builder("Master.MetadataSyncV2FilesDeleted") + .setDescription("The number of files deleted during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_RECREATED = + new Builder("Master.MetadataSyncV2FilesRecreated") + .setDescription("The number of files recreated during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_UPDATED = + new Builder("Master.MetadataSyncV2FilesUpdated") + .setDescription("The number of files updated during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_SKIPPED_CONCURRENT_UPDATE = + new Builder("Master.MetadataSyncV2FilesSkippedConcurrentUpdate") + .setDescription("The number of files skipped due to concurrent update " + + "during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_SKIPPED_MOUNT_POINT = + new Builder("Master.MetadataSyncV2FilesSkippedMountPoint") + .setDescription("The number of files skipped because the inode is a mount point " + + "during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_NOOP = + new Builder("Master.MetadataSyncV2FilesNoop") + .setDescription("The number of files at parity between alluxio and UFS " + + "during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_SKIPPED_NON_PERSISTED = + new Builder("Master.MetadataSyncV2FilesSkippedNonPersisted") + .setDescription("The number of files skipped because the " + + "inode is not persisted during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + // Metadata sync metrics public static final MetricKey MASTER_METADATA_SYNC_UFS_MOUNT = new Builder("Master.MetadataSyncUfsMount.") @@ -842,6 +973,18 @@ public static String getSyncMetricName(long mountId) { .build(); // Journal metrics + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DURATION_MS = + new Builder("Master.EmbeddedJournalLastSnapshotDurationMs") + .setDescription("Describes the amount of time taken to generate the last local journal " + + "snapshots on this master. Only valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_ENTRIES_COUNT = + new Builder("Master.EmbeddedJournalLastSnapshotEntriesCount") + .setDescription("Describes the number of entries in the last local journal " + + "snapshots on this master. Only valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_GENERATE_TIMER = new Builder("Master.EmbeddedJournalSnapshotGenerateTimer") .setDescription("Describes the amount of time taken to generate local journal snapshots" @@ -852,11 +995,84 @@ public static String getSyncMetricName(long mountId) { public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_TIMER = new Builder("Master.EmbeddedJournalSnapshotDownloadGenerate") .setDescription("Describes the amount of time taken to download journal snapshots from " - + "other masters in the cluster. Only valid when using the embedded journal. Use " - + "this metric to determine if there are potential communication bottlenecks " - + "between Alluxio masters.") + + "other masters in the cluster. Only valid when using the embedded journal. Long " + + "running average.") .setMetricType(MetricType.TIMER) .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DOWNLOAD_DURATION_MS = + new Builder("Master.EmbeddedJournalLastSnapshotDownloadDurationMs") + .setDescription("Describes the amount of time taken to download journal snapshots from " + + "other masters in the cluster the previous time the download occurred. Only " + + "valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_HISTOGRAM = + new Builder("Master.EmbeddedJournalSnapshotDownloadHistogram") + .setDescription("Describes the size of the snapshot downloaded from another master in " + + "the cluster. Only valid when using the embedded journal. Long running average.") + .setMetricType(MetricType.HISTOGRAM) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DOWNLOAD_SIZE = + new Builder("Master.EmbeddedJournalLastSnapshotDownloadSize") + .setDescription("Describes the size of the snapshot downloaded from " + + "other masters in the cluster the previous time the download occurred. Only " + + "valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_DISK_HISTOGRAM = + new Builder("Master.EmbeddedJournalSnapshotDownloadDiskHistogram") + .setDescription("Describes the size on disk of the snapshot downloaded from another " + + "master in the cluster. Only valid when using the embedded journal. " + + "Long running average.") + .setMetricType(MetricType.HISTOGRAM) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DOWNLOAD_DISK_SIZE = + new Builder("Master.EmbeddedJournalLastSnapshotDownloadDiskSize") + .setDescription("Describes the size on disk of the snapshot downloaded from " + + "other masters in the cluster the previous time the download occurred. Only " + + "valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_UPLOAD_HISTOGRAM = + new Builder("Master.EmbeddedJournalSnapshotUploadHistogram") + .setDescription("Describes the size of the snapshot uploaded to another master in " + + "the cluster. Only valid when using the embedded journal. Long running average.") + .setMetricType(MetricType.HISTOGRAM) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_UPLOAD_SIZE = + new Builder("Master.EmbeddedJournalLastSnapshotUploadSize") + .setDescription("Describes the size of the snapshot uploaded to " + + "other masters in the cluster the previous time the download occurred. Only " + + "valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_UPLOAD_DISK_HISTOGRAM = + new Builder("Master.EmbeddedJournalSnapshotUploadDiskHistogram") + .setDescription("Describes the size on disk of the snapshot uploaded to another master " + + "in the cluster. Only valid when using the embedded journal. Long running average.") + .setMetricType(MetricType.HISTOGRAM) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_UPLOAD_DISK_SIZE = + new Builder("Master.EmbeddedJournalLastSnapshotUploadDiskSize") + .setDescription("Describes the size on disk of the snapshot uploaded to " + + "other masters in the cluster the previous time the download occurred. Only " + + "valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_UPLOAD_TIMER = + new Builder("Master.EmbeddedJournalSnapshotUploadTimer") + .setDescription("Describes the amount of time taken to upload journal snapshots to " + + "another master in the cluster. Only valid when using the embedded journal. long " + + "running average") + .setMetricType(MetricType.TIMER) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_UPLOAD_DURATION_MS = + new Builder("Master.EmbeddedJournalLastSnapshotUploadDurationMs") + .setDescription("Describes the amount of time taken to upload journal snapshots to " + + "another master in the cluster the previous time the upload occurred. Only " + + "valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_INSTALL_TIMER = new Builder("Master.EmbeddedJournalSnapshotInstallTimer") .setDescription("Describes the amount of time taken to install a downloaded journal " @@ -874,6 +1090,18 @@ public static String getSyncMetricName(long mountId) { + "snapshot file. Higher numbers may indicate a slow disk or CPU contention") .setMetricType(MetricType.TIMER) .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_REPLAY_DURATION_MS = + new Builder("Master.EmbeddedJournalLastSnapshotReplayDurationMs") + .setDescription("Represents the time the last restore from checkpoint operation took in" + + " milliseconds.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_REPLAY_ENTRIES_COUNT = + new Builder("Master.EmbeddedJournalLastSnapshotReplayEntriesCount") + .setDescription("Represents the time the last restore from checkpoint operation took in" + + " milliseconds.") + .setMetricType(MetricType.GAUGE) + .build(); public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_LAST_INDEX = new Builder("Master.EmbeddedJournalSnapshotLastIndex") .setDescription("Represents the latest journal index that was recorded by this master " @@ -886,6 +1114,21 @@ public static String getSyncMetricName(long mountId) { .setDescription("Display master role id") .setMetricType(MetricType.GAUGE) .build(); + public static final MetricKey MASTER_START_TIME = + new Builder("Master.StartTime") + .setDescription("The start time of the master process") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_LAST_GAIN_PRIMACY_TIME = + new Builder("Master.LastGainPrimacyTime") + .setDescription("Last time the master gains primacy") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_LAST_LOSE_PRIMACY_TIME = + new Builder("Master.LastLosePrimacyTime") + .setDescription("Last time the master loses primacy") + .setMetricType(MetricType.GAUGE) + .build(); public static final MetricKey MASTER_JOURNAL_FLUSH_FAILURE = new Builder("Master.JournalFlushFailure") .setDescription("Total number of failed journal flush") @@ -983,7 +1226,38 @@ public static String getSyncMetricName(long mountId) { .setDescription("The number of running status job") .setMetricType(MetricType.COUNTER) .build(); - +// new job metrics + public static final MetricKey MASTER_JOB_LOAD_SUCCESS = + new Builder("Master.JobLoadSuccess") + .setDescription("The number of successful Load commands") + .setMetricType(MetricType.COUNTER) + .build(); + public static final MetricKey MASTER_JOB_LOAD_FAIL = + new Builder("Master.JobLoadFail") + .setDescription("The number of failed Load commands") + .setMetricType(MetricType.COUNTER) + .build(); + public static final MetricKey MASTER_JOB_LOAD_BLOCK_COUNT = + new Builder("Master.JobLoadBlockCount") + .setDescription("The number of blocks loaded by load commands") + .setMetricType(MetricType.COUNTER) + .build(); + public static final MetricKey MASTER_JOB_LOAD_BLOCK_FAIL = + new Builder("Master.JobLoadBlockFail") + .setDescription("The number of blocks failed to be loaded by load commands") + .setMetricType(MetricType.COUNTER) + .build(); + public static final MetricKey MASTER_JOB_LOAD_BLOCK_SIZE = + new Builder("Master.JobDistributedLoadBlockSizes") + .setDescription("The total block size loaded by load commands") + .setMetricType(MetricType.COUNTER) + .build(); + public static final MetricKey MASTER_JOB_LOAD_RATE = + new Builder("Master.JobLoadRate") + .setDescription("The average loading rate of Load commands") + .setMetricType(MetricType.METER) + .setIsClusterAggregated(true) + .build(); // Distributed command related metrics public static final MetricKey MASTER_JOB_DISTRIBUTED_LOAD_SUCCESS = new Builder("Master.JobDistributedLoadSuccess") @@ -1437,6 +1711,13 @@ public static String getSyncMetricName(long mountId) { .setDescription("Bytes read per minute throughput from all Alluxio UFSes by all workers") .setMetricType(MetricType.GAUGE) .build(); + + public static final MetricKey CLUSTER_BYTES_READ_CACHE = + new Builder("Cluster.BytesReadCache") + .setDescription("Total number of bytes read from all worker's cache") + .setMetricType(MetricType.COUNTER) + .build(); + public static final MetricKey CLUSTER_BYTES_WRITTEN_REMOTE = new Builder("Cluster.BytesWrittenRemote") .setDescription("Total number of bytes written to workers via network (RPC). " @@ -1583,12 +1864,24 @@ public static String getSyncMetricName(long mountId) { .setMetricType(MetricType.COUNTER) .setIsClusterAggregated(true) .build(); + /* + * This metric is inaccurate because it is updated at so many places. + * Given time, it should be deprecated and replaced by WORKER_ACTIVE_OPERATIONS. + */ + @Deprecated public static final MetricKey WORKER_ACTIVE_CLIENTS = new Builder("Worker.ActiveClients") .setDescription("The number of clients actively reading from or writing to this worker") .setMetricType(MetricType.COUNTER) .setIsClusterAggregated(true) .build(); + public static final MetricKey WORKER_ACTIVE_OPERATIONS = + new Builder("Worker.ActiveOperations") + .setDescription("The number of active RPCs in the worker, including control RPCs " + + "and data I/O. Used to tell if the worker is idle or busy.") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); public static final MetricKey WORKER_BLOCKS_ACCESSED = new Builder("Worker.BlocksAccessed") .setDescription("Total number of times any one of the blocks in this worker is accessed.") @@ -1717,6 +2010,14 @@ public static String getSyncMetricName(long mountId) { .setMetricType(MetricType.METER) .setIsClusterAggregated(false) .build(); + + public static final MetricKey WORKER_BYTES_READ_CACHE = + new Builder("Worker.BytesReadCache") + .setDescription("Total number of bytes read from the worker's cache") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(true) + .build(); + public static final MetricKey WORKER_BYTES_WRITTEN_DIRECT = new Builder("Worker.BytesWrittenDirect") .setDescription("Total number of bytes written to this worker " @@ -1833,6 +2134,12 @@ public static String getSyncMetricName(long mountId) { .setMetricType(MetricType.COUNTER) .setIsClusterAggregated(false) .build(); + public static final MetricKey WORKER_CACHE_REJECTED_BLOCKS = + new Builder("Worker.CacheRejectedBlocks") + .setDescription("Total number of rejected cache block requests on the worker") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); public static final MetricKey WORKER_CACHE_UFS_BLOCKS = new Builder("Worker.CacheUfsBlocks") .setDescription("Total number of blocks that need to be cached from local source") @@ -2008,6 +2315,11 @@ public static String getSyncMetricName(long mountId) { + "Use this metric to monitor the RPC pressure on worker.") .setMetricType(MetricType.GAUGE) .build(); + public static final MetricKey WORKER_MASTER_REGISTRATION_SUCCESS_COUNT = + new Builder("Worker.MasterRegistrationSuccessCount") + .setDescription("Total number of the succeed master registration.") + .setMetricType(MetricType.COUNTER) + .build(); // Client metrics public static final MetricKey CLIENT_BLOCK_READ_CHUNK_REMOTE = diff --git a/core/common/src/main/java/alluxio/metrics/MetricsSystem.java b/core/common/src/main/java/alluxio/metrics/MetricsSystem.java index 5b9440bb4cce..7d141c82bb93 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricsSystem.java +++ b/core/common/src/main/java/alluxio/metrics/MetricsSystem.java @@ -24,8 +24,10 @@ import com.codahale.metrics.CachedGauge; import com.codahale.metrics.Counter; import com.codahale.metrics.Gauge; +import com.codahale.metrics.Histogram; import com.codahale.metrics.Meter; import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.SlidingTimeWindowMovingAverages; import com.codahale.metrics.Timer; import com.codahale.metrics.UniformReservoir; import com.codahale.metrics.jvm.CachedThreadStatesGaugeSet; @@ -40,6 +42,7 @@ import org.slf4j.LoggerFactory; import java.lang.management.BufferPoolMXBean; +import java.lang.management.ManagementFactory; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -91,6 +94,7 @@ public final class MetricsSystem { CommonUtils.memoize(() -> constructSourceName()); private static final Map EXECUTOR_SERVICES = new ConcurrentHashMap<>(); + private static final int SECONDS_IN_A_MINUTE = 60; /** * An enum of supported instance type. @@ -163,7 +167,7 @@ public static InstanceType fromString(String text) { private static BufferPoolMXBean getDirectBufferPool() { for (BufferPoolMXBean bufferPoolMXBean - : sun.management.ManagementFactoryHelper.getBufferPoolMXBeans()) { + : ManagementFactory.getPlatformMXBeans(BufferPoolMXBean.class)) { if (bufferPoolMXBean.getName().equals("direct")) { return bufferPoolMXBean; } @@ -591,7 +595,8 @@ public static Counter counterWithTags(String name, boolean shouldReport, String. * @return a meter object with the qualified metric name */ public static Meter meter(String name) { - return METRIC_REGISTRY.meter(getMetricName(name)); + return METRIC_REGISTRY.meter(getMetricName(name), + () -> new Meter(new SlidingTimeWindowMovingAverages())); } /** @@ -641,6 +646,16 @@ public static Timer uniformTimer(String name) { }); } + /** + * Get or add a histogram with the given name. + * + * @param name the name of the metric + * @return a histogram object with the qualified metric name + */ + public static Histogram histogram(String name) { + return METRIC_REGISTRY.histogram(getMetricName(name)); + } + /** * Registers a gauge if it has not been registered. * @@ -786,7 +801,7 @@ private static synchronized List reportMetrics(InstanceType // that a value marked. For clients, especially short-life clients, // the minute rates will be zero for their whole life. // That's why all throughput meters are not aggregated at cluster level. - rpcMetrics.add(Metric.from(entry.getKey(), meter.getOneMinuteRate(), + rpcMetrics.add(Metric.from(entry.getKey(), meter.getOneMinuteRate() / SECONDS_IN_A_MINUTE, MetricType.METER).toProto()); } else if (metric instanceof Timer) { Timer timer = (Timer) metric; @@ -871,7 +886,7 @@ private static Metric getAlluxioMetricFromCodahaleMetric(String name, return Metric.from(name, counter.getCount(), MetricType.COUNTER); } else if (metric instanceof Meter) { Meter meter = (Meter) metric; - return Metric.from(name, meter.getOneMinuteRate(), MetricType.METER); + return Metric.from(name, meter.getOneMinuteRate() / SECONDS_IN_A_MINUTE, MetricType.METER); } else if (metric instanceof Timer) { Timer timer = (Timer) metric; return Metric.from(name, timer.getCount(), MetricType.TIMER); @@ -903,7 +918,7 @@ public static Map allMetrics() { .setDoubleValue(((Counter) metric).getCount()); } else if (metric instanceof Meter) { valueBuilder.setMetricType(MetricType.METER) - .setDoubleValue(((Meter) metric).getOneMinuteRate()); + .setDoubleValue(((Meter) metric).getOneMinuteRate() / SECONDS_IN_A_MINUTE); } else if (metric instanceof Timer) { valueBuilder.setMetricType(MetricType.TIMER) .setDoubleValue(((Timer) metric).getCount()); diff --git a/core/common/src/main/java/alluxio/network/RejectingServer.java b/core/common/src/main/java/alluxio/network/RejectingServer.java index 23b39ccbd943..8d922e9cf7d3 100644 --- a/core/common/src/main/java/alluxio/network/RejectingServer.java +++ b/core/common/src/main/java/alluxio/network/RejectingServer.java @@ -12,7 +12,9 @@ package alluxio.network; import alluxio.Constants; +import alluxio.util.CommonUtils; +import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,13 +33,24 @@ public final class RejectingServer extends Thread { private final InetSocketAddress mAddress; private ServerSocket mServerSocket; + private final long mSleepTime; /** * @param address the socket address to reject requests on */ public RejectingServer(InetSocketAddress address) { + this(address, 0); + } + + /** + * @param address the socket address to reject requests on + * @param sleepTime sleep time before close connection + */ + @VisibleForTesting + public RejectingServer(InetSocketAddress address, long sleepTime) { super("RejectingServer-" + address); mAddress = address; + mSleepTime = sleepTime; } @Override @@ -52,6 +65,9 @@ public void run() { while (!Thread.interrupted()) { try { Socket s = mServerSocket.accept(); + if (mSleepTime > 0) { + CommonUtils.sleepMs(mSleepTime); + } s.close(); } catch (SocketException e) { return; diff --git a/core/common/src/main/java/alluxio/network/protocol/databuffer/NioDirectBufferPool.java b/core/common/src/main/java/alluxio/network/protocol/databuffer/NioDirectBufferPool.java index 8a031fa97048..49d7dd99ab1c 100644 --- a/core/common/src/main/java/alluxio/network/protocol/databuffer/NioDirectBufferPool.java +++ b/core/common/src/main/java/alluxio/network/protocol/databuffer/NioDirectBufferPool.java @@ -11,6 +11,9 @@ package alluxio.network.protocol.databuffer; +import alluxio.exception.runtime.ResourceExhaustedRuntimeException; +import alluxio.retry.RetryPolicy; + import java.nio.ByteBuffer; import java.util.LinkedList; import java.util.Map; @@ -39,6 +42,24 @@ public static synchronized ByteBuffer acquire(int length) { return buffer; } + /** + * @param length + * @param policy the retry policy to use + * @return buffer + */ + public static synchronized ByteBuffer acquire(int length, RetryPolicy policy) { + Error cause = null; + while (policy.attempt()) { + try { + return acquire(length); + } catch (OutOfMemoryError error) { + cause = error; + } + } + throw new ResourceExhaustedRuntimeException("Not enough direct memory allocated to buffer", + cause, false); + } + /** * @param buffer */ diff --git a/core/common/src/main/java/alluxio/network/protocol/databuffer/RefCountedNioByteBuf.java b/core/common/src/main/java/alluxio/network/protocol/databuffer/RefCountedNioByteBuf.java index 1ff14a53fab9..58536d6ea07b 100644 --- a/core/common/src/main/java/alluxio/network/protocol/databuffer/RefCountedNioByteBuf.java +++ b/core/common/src/main/java/alluxio/network/protocol/databuffer/RefCountedNioByteBuf.java @@ -281,7 +281,9 @@ public int getBytes(int index, FileChannel out, long position, int length) throw @Override public ByteBuf setBytes(int index, ByteBuf src, int srcIndex, int length) { ensureIndexInBounds(srcIndex, src.capacity(), index, capacity(), length); - src.getBytes(srcIndex, this, index, length); + ByteBuffer dup = mDelegate.duplicate(); + dup.position(index).limit(index + length); + src.getBytes(srcIndex, dup); return this; } diff --git a/core/common/src/main/java/alluxio/resource/CloseableIterator.java b/core/common/src/main/java/alluxio/resource/CloseableIterator.java index b6f473ad44fd..c2ba6138e77f 100644 --- a/core/common/src/main/java/alluxio/resource/CloseableIterator.java +++ b/core/common/src/main/java/alluxio/resource/CloseableIterator.java @@ -43,7 +43,7 @@ public abstract class CloseableIterator extends CloseableResource * * @param iterator the resource to wrap */ - CloseableIterator(Iterator iterator) { + protected CloseableIterator(Iterator iterator) { super(iterator); mIter = iterator; } diff --git a/core/common/src/main/java/alluxio/resource/DynamicResourcePool.java b/core/common/src/main/java/alluxio/resource/DynamicResourcePool.java index 7b86f66027cf..abb785e55f74 100644 --- a/core/common/src/main/java/alluxio/resource/DynamicResourcePool.java +++ b/core/common/src/main/java/alluxio/resource/DynamicResourcePool.java @@ -46,6 +46,16 @@ */ @ThreadSafe public abstract class DynamicResourcePool implements Pool { + /** + * A policy specifying in what order to pick a resource item from a pool. + */ + public enum SelectionPolicy { + // first-in-first-out, use the hottest resource + FIFO, + // last-in-first-out, use the coldest resource + LIFO, + } + private static final Logger LOG = LoggerFactory.getLogger(DynamicResourcePool.class); /** @@ -110,7 +120,7 @@ public static final class Options { * If set to false, the first returned resource will take priority. * {@link #acquire()} tends to reuse the most fresh resource if possible. */ - private boolean mFIFO = false; + private SelectionPolicy mSelectionPolicy = SelectionPolicy.LIFO; /** * @return the max capacity @@ -148,18 +158,18 @@ public ScheduledExecutorService getGcExecutor() { } /** - * @return if resources are returned in a FIFO manner + * @return the selection policy */ - public boolean getFIFO() { - return mFIFO; + public SelectionPolicy getSelectionPolicy() { + return mSelectionPolicy; } /** - * @param fifo if resources should be returned in a FIFO manner + * @param policy how to select a client from the pool * @return the updated object */ - public Options setFIFO(boolean fifo) { - mFIFO = fifo; + public Options setSelectionPolicy(SelectionPolicy policy) { + mSelectionPolicy = policy; return this; } @@ -233,12 +243,9 @@ public static Options defaultOptions() { private final int mMinCapacity; /** - * If set to true, when a resource needs to be taken from the pool, the last returned resource - * will take priority. {@link #acquire()} tends to return a different object every time. - * If set to false, the first returned resource will take priority. - * {@link #acquire()} tends to reuse the most fresh resource if possible. + * the selection policy of the resource pool. see {@link SelectionPolicy} for details */ - private final boolean mFIFO; + protected final SelectionPolicy mSelectionPolicy; // Tracks the resources that are available ordered by lastAccessTime (the head is // the most recently used resource). @@ -251,7 +258,7 @@ public static Options defaultOptions() { // put/delete operations are guarded by "mLock" so that we can control its size to be within // a [min, max] range. mLock is reused for simplicity. A separate lock can be used if we see // any performance overhead. - private final ConcurrentHashMap> mResources = + protected final ConcurrentHashMap> mResources = new ConcurrentHashMap<>(32); private final Counter mCounter; @@ -272,7 +279,7 @@ public DynamicResourcePool(Options options) { "cannot find resource count metric for %s", getClass().getName()); mMaxCapacity = options.getMaxCapacity(); mMinCapacity = options.getMinCapacity(); - mFIFO = options.getFIFO(); + mSelectionPolicy = options.getSelectionPolicy(); mAvailableResources = new ArrayDeque<>(Math.min(mMaxCapacity, 32)); mGcFuture = mExecutor.scheduleAtFixedRate(() -> { List resourcesToGc = new ArrayList<>(); @@ -494,10 +501,15 @@ private void remove(T resource) { private ResourceInternal poll() { try { mLock.lock(); - if (mFIFO) { - return mAvailableResources.pollLast(); + switch (mSelectionPolicy) { + case FIFO: + return mAvailableResources.pollLast(); + case LIFO: + return mAvailableResources.pollFirst(); + default: + throw new UnsupportedOperationException( + "Policy " + mSelectionPolicy + " is not supported!"); } - return mAvailableResources.pollFirst(); } finally { mLock.unlock(); } diff --git a/core/common/src/main/java/alluxio/resource/NoopCloseable.java b/core/common/src/main/java/alluxio/resource/NoopCloseable.java new file mode 100644 index 000000000000..4331588312e1 --- /dev/null +++ b/core/common/src/main/java/alluxio/resource/NoopCloseable.java @@ -0,0 +1,29 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.resource; + +import java.io.Closeable; +import java.io.IOException; + +/** + * A noop closeable that does nothing upon close. + */ +public class NoopCloseable implements Closeable { + private NoopCloseable() { + } + + @Override + public void close() throws IOException { + } + + public static final NoopCloseable INSTANCE = new NoopCloseable(); +} diff --git a/core/common/src/main/java/alluxio/retry/RetryUtils.java b/core/common/src/main/java/alluxio/retry/RetryUtils.java index f07530aec288..7dce3d8891c3 100644 --- a/core/common/src/main/java/alluxio/retry/RetryUtils.java +++ b/core/common/src/main/java/alluxio/retry/RetryUtils.java @@ -30,7 +30,8 @@ public final class RetryUtils { /** * Retries the given method until it doesn't throw an IO exception or the retry policy expires. If - * the retry policy expires, the last exception generated will be rethrown. + * the retry policy expires, the last exception generated will be rethrown. If no retry succeeds + * then a default IO Exception will be thrown. * * @param action a description of the action that fits the phrase "Failed to ${action}" * @param f the function to retry @@ -45,10 +46,14 @@ public static void retry(String action, RunnableThrowsIOException f, RetryPolicy return; } catch (IOException ioe) { e = ioe; - LOG.warn("Failed to {} (attempt {}): {}", action, policy.getAttemptCount(), e.toString()); + LOG.debug("Failed to {} (attempt {}): {}", action, policy.getAttemptCount(), e.toString()); } } - throw e; + if (e != null) { + throw e; + } + throw new IOException(String.format("Failed to run action %s after %d attempts", + action, policy.getAttemptCount())); } /** diff --git a/core/common/src/main/java/alluxio/retry/SleepingRetry.java b/core/common/src/main/java/alluxio/retry/SleepingRetry.java index 31eed7e0a91b..ec03d07dbedf 100644 --- a/core/common/src/main/java/alluxio/retry/SleepingRetry.java +++ b/core/common/src/main/java/alluxio/retry/SleepingRetry.java @@ -25,7 +25,7 @@ public abstract class SleepingRetry implements RetryPolicy { private int mAttemptCount = 0; protected SleepingRetry(int maxRetries) { - Preconditions.checkArgument(maxRetries > 0, "Max retries must be a positive number"); + Preconditions.checkArgument(maxRetries >= 0, "Max retries must be a non-negative number"); mMaxRetries = maxRetries; } diff --git a/core/common/src/main/java/alluxio/security/authentication/ClientIpAddressInjector.java b/core/common/src/main/java/alluxio/security/authentication/ClientContextServerInjector.java similarity index 65% rename from core/common/src/main/java/alluxio/security/authentication/ClientIpAddressInjector.java rename to core/common/src/main/java/alluxio/security/authentication/ClientContextServerInjector.java index 07ff7374ef7c..ab9de9ebed2f 100644 --- a/core/common/src/main/java/alluxio/security/authentication/ClientIpAddressInjector.java +++ b/core/common/src/main/java/alluxio/security/authentication/ClientContextServerInjector.java @@ -11,6 +11,8 @@ package alluxio.security.authentication; +import alluxio.grpc.ClientVersionClientInjector; + import io.grpc.ForwardingServerCallListener; import io.grpc.Grpc; import io.grpc.Metadata; @@ -18,32 +20,50 @@ import io.grpc.ServerCallHandler; import io.grpc.ServerInterceptor; +import javax.annotation.Nullable; + /** * Server side interceptor that is used to put remote client's IP Address to thread local storage. */ -public class ClientIpAddressInjector implements ServerInterceptor { +public class ClientContextServerInjector implements ServerInterceptor { /** * A {@link ThreadLocal} variable to maintain the client's IP address along with a specific * thread. */ - private static ThreadLocal sIpAddressThreadLocal = new ThreadLocal<>(); + private static final ThreadLocal IP_ADDRESS_THREAD_LOCAL = new ThreadLocal<>(); + /** + * A {@link ThreadLocal} variable to maintain the client's version along with a specific + * thread. + */ + private static final ThreadLocal CLIENT_VERSION_THREAD_LOCAL = + new ThreadLocal<>(); /** * @return IP address of the gRPC client that is making the call */ + @Nullable public static String getIpAddress() { - return sIpAddressThreadLocal.get(); + return IP_ADDRESS_THREAD_LOCAL.get(); + } + + /** + * @return the client version + */ + @Nullable + public static String getClientVersion() { + return CLIENT_VERSION_THREAD_LOCAL.get(); } @Override public ServerCall.Listener interceptCall(ServerCall call, Metadata headers, ServerCallHandler next) { /** - * For streaming calls, below will make sure remote IP address is injected prior to creating the - * stream. + * For streaming calls, below will make sure remote IP address and client version are + * injected prior to creating the stream. */ setRemoteIpAddress(call); + setClientVersion(headers); /** * For non-streaming calls to server, below listener will be invoked in the same thread that is @@ -54,6 +74,7 @@ public ServerCall.Listener interceptCall(ServerCall void setRemoteIpAddress(ServerCall call) { String remoteIpAddress = call.getAttributes().get(Grpc.TRANSPORT_ATTR_REMOTE_ADDR).toString(); - sIpAddressThreadLocal.set(remoteIpAddress); + IP_ADDRESS_THREAD_LOCAL.set(remoteIpAddress); + } + + private void setClientVersion(Metadata headers) { + String version = headers.get(ClientVersionClientInjector.S_CLIENT_VERSION_KEY); + CLIENT_VERSION_THREAD_LOCAL.set(version); } } diff --git a/core/common/src/main/java/alluxio/time/Sleeper.java b/core/common/src/main/java/alluxio/time/Sleeper.java index cc972c7cd97e..27db10a8c14a 100644 --- a/core/common/src/main/java/alluxio/time/Sleeper.java +++ b/core/common/src/main/java/alluxio/time/Sleeper.java @@ -12,6 +12,7 @@ package alluxio.time; import java.time.Duration; +import java.util.function.Supplier; /** * An interface for a utility which provides a sleep method. @@ -25,4 +26,14 @@ public interface Sleeper { * @throws InterruptedException if the sleep is interrupted */ void sleep(Duration duration) throws InterruptedException; + + /** + * Sleeps for given duration but period wake-up by new interval supplier. + * @param durationSupplier New sleep interval supplier + * @throws InterruptedException + */ + default void sleep(Supplier durationSupplier) + throws InterruptedException { + sleep(durationSupplier.get()); + } } diff --git a/core/common/src/main/java/alluxio/time/SteppingThreadSleeper.java b/core/common/src/main/java/alluxio/time/SteppingThreadSleeper.java new file mode 100644 index 000000000000..c1e8800f81ef --- /dev/null +++ b/core/common/src/main/java/alluxio/time/SteppingThreadSleeper.java @@ -0,0 +1,88 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.time; + +import alluxio.Constants; + +import com.google.common.annotations.VisibleForTesting; + +import java.time.Clock; +import java.time.Duration; +import java.util.function.Supplier; + +/** + * A progressive sleeper that wakes up multiple times during sleep to check if the requested sleep + * duration has changed, and adjust its sleep duration accordingly. + * */ +public final class SteppingThreadSleeper implements Sleeper { + private long mSleepStepMs = Constants.MINUTE; + + public static final SteppingThreadSleeper INSTANCE = new SteppingThreadSleeper(); + + private final Sleeper mInternalSleeper; + private final Clock mClock; + + private SteppingThreadSleeper() { + mInternalSleeper = ThreadSleeper.INSTANCE; + mClock = Clock.systemUTC(); + } + + /** + * Creates a new instance of {@link SteppingThreadSleeper}. + * @param internalSleeper the internal sleeper + * @param clock for telling the current time + */ + @VisibleForTesting + public SteppingThreadSleeper(Sleeper internalSleeper, Clock clock) { + mInternalSleeper = internalSleeper; + mClock = clock; + } + + @Override + public void sleep(Duration duration) throws InterruptedException { + mInternalSleeper.sleep(duration); + } + + @Override + public void sleep(Supplier durationSupplier) throws InterruptedException { + Duration duration = durationSupplier.get(); + if (duration.toMillis() < 0) { + return; + } + if (duration.toMillis() < mSleepStepMs) { + sleep(duration); + return; + } + long startSleepMs = mClock.millis(); + long sleepTo = startSleepMs + duration.toMillis(); + long timeNow; + while ((timeNow = mClock.millis()) < sleepTo) { + long sleepTime = Math.min(sleepTo - timeNow, mSleepStepMs); + mInternalSleeper.sleep(Duration.ofMillis(sleepTime)); + + long newInterval = durationSupplier.get().toMillis(); + if (newInterval >= 0) { + sleepTo = startSleepMs + newInterval; + } + } + } + + /** + * Sets the sleep step. + * + * @param sleepStepMs the sleep step + */ + @VisibleForTesting + public void setSleepStepMs(long sleepStepMs) { + mSleepStepMs = sleepStepMs; + } +} diff --git a/core/common/src/main/java/alluxio/underfs/AtomicFileOutputStream.java b/core/common/src/main/java/alluxio/underfs/AtomicFileOutputStream.java index e46967d1cff5..141317ccb663 100644 --- a/core/common/src/main/java/alluxio/underfs/AtomicFileOutputStream.java +++ b/core/common/src/main/java/alluxio/underfs/AtomicFileOutputStream.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; +import java.util.Optional; import javax.annotation.concurrent.NotThreadSafe; /** @@ -28,7 +29,7 @@ * that writing to the stream is atomic, i.e., all writes become readable only after a close. */ @NotThreadSafe -public class AtomicFileOutputStream extends OutputStream { +public class AtomicFileOutputStream extends OutputStream implements ContentHashable { private static final Logger LOG = LoggerFactory.getLogger(AtomicFileOutputStream.class); private AtomicFileOutputStreamCallback mUfs; @@ -95,5 +96,15 @@ public void close() throws IOException { // TODO(chaomin): consider setMode of the ufs file. mClosed = true; } + + @Override + public Optional getContentHash() throws IOException { + // get the content hash immediately after the file has completed writing + // which will be used for generating the fingerprint of the file in Alluxio + // ideally this value would be received as a result from the close call + // so that we would be sure to have the hash relating to the file uploaded + // (but such an API is not available for the UFSs that use this stream type) + return Optional.of(mUfs.getFileStatus(mPermanentPath).getContentHash()); + } } diff --git a/core/common/src/main/java/alluxio/underfs/BaseUnderFileSystem.java b/core/common/src/main/java/alluxio/underfs/BaseUnderFileSystem.java index 6be963b77103..a33135db1a07 100644 --- a/core/common/src/main/java/alluxio/underfs/BaseUnderFileSystem.java +++ b/core/common/src/main/java/alluxio/underfs/BaseUnderFileSystem.java @@ -16,6 +16,8 @@ import alluxio.SyncInfo; import alluxio.collections.Pair; import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.file.options.DescendantType; import alluxio.security.authorization.AccessControlList; import alluxio.security.authorization.AclEntry; import alluxio.security.authorization.DefaultAccessControlList; @@ -24,21 +26,32 @@ import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; +import alluxio.util.RateLimiter; +import alluxio.util.ThreadFactoryUtils; import alluxio.util.io.PathUtils; import com.google.common.base.Preconditions; +import com.google.common.collect.Iterators; +import com.google.common.io.Closer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Queue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.function.Consumer; +import java.util.stream.Stream; import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; @@ -46,7 +59,7 @@ * A base abstract {@link UnderFileSystem}. */ @ThreadSafe -public abstract class BaseUnderFileSystem implements UnderFileSystem { +public abstract class BaseUnderFileSystem implements UnderFileSystem, UfsClient { private static final Logger LOG = LoggerFactory.getLogger(BaseUnderFileSystem.class); public static final Pair EMPTY_ACL = new Pair<>(null, null); @@ -57,6 +70,10 @@ public abstract class BaseUnderFileSystem implements UnderFileSystem { /** UFS Configuration options. */ protected final UnderFileSystemConfiguration mUfsConf; + private final ExecutorService mAsyncIOExecutor; + + private final RateLimiter mRateLimiter; + /** * Constructs an {@link BaseUnderFileSystem}. * @@ -66,6 +83,27 @@ public abstract class BaseUnderFileSystem implements UnderFileSystem { protected BaseUnderFileSystem(AlluxioURI uri, UnderFileSystemConfiguration ufsConf) { mUri = Preconditions.checkNotNull(uri, "uri"); mUfsConf = Preconditions.checkNotNull(ufsConf, "ufsConf"); + mAsyncIOExecutor = Executors.newCachedThreadPool( + ThreadFactoryUtils.build(uri.getPath() + "IOThread", true)); + long rateLimit = mUfsConf.isSet(PropertyKey.MASTER_METADATA_SYNC_UFS_RATE_LIMIT) + ? mUfsConf.getLong(PropertyKey.MASTER_METADATA_SYNC_UFS_RATE_LIMIT) : 0; + mRateLimiter = RateLimiter.createRateLimiter(rateLimit); + } + + @Override + public void close() throws IOException { + try (Closer closer = Closer.create()) { + closer.register(() -> { + if (mAsyncIOExecutor != null) { + mAsyncIOExecutor.shutdown(); + } + }); + } + } + + @Override + public RateLimiter getRateLimiter() { + return mRateLimiter; } @Override @@ -109,7 +147,7 @@ public String getFingerprint(String path) { if (aclPair == null || aclPair.getFirst() == null || !aclPair.getFirst().hasExtended()) { return Fingerprint.create(getUnderFSType(), status).serialize(); } else { - return Fingerprint.create(getUnderFSType(), status, aclPair.getFirst()).serialize(); + return Fingerprint.create(getUnderFSType(), status, null, aclPair.getFirst()).serialize(); } } catch (Exception e) { // In certain scenarios, it is expected that the UFS path does not exist. @@ -120,14 +158,19 @@ public String getFingerprint(String path) { @Override public Fingerprint getParsedFingerprint(String path) { + return getParsedFingerprint(path, null); + } + + @Override + public Fingerprint getParsedFingerprint(String path, @Nullable String contentHash) { try { UfsStatus status = getStatus(path); Pair aclPair = getAclPair(path); if (aclPair == null || aclPair.getFirst() == null || !aclPair.getFirst().hasExtended()) { - return Fingerprint.create(getUnderFSType(), status); + return Fingerprint.create(getUnderFSType(), status, contentHash); } else { - return Fingerprint.create(getUnderFSType(), status, aclPair.getFirst()); + return Fingerprint.create(getUnderFSType(), status, contentHash, aclPair.getFirst()); } } catch (IOException e) { return Fingerprint.INVALID_FINGERPRINT; @@ -158,6 +201,94 @@ public boolean isSeekable() { return false; } + @Nullable + @Override + public Iterator listStatusIterable( + String path, ListOptions options, String startAfter, int batchSize) throws IOException { + // Calling this method on non s3 UFS might result in OOM because batch based fetching + // is not supported and this method essentially fetches all ufs status and converts it to + // an iterator. + UfsStatus[] result = listStatus(path, options); + if (result == null) { + return null; + } + Arrays.sort(result, Comparator.comparing(UfsStatus::getName)); + return Iterators.forArray(result); + } + + @Override + public void performListingAsync( + String path, @Nullable String continuationToken, @Nullable String startAfter, + DescendantType descendantType, boolean checkStatus, Consumer onComplete, + Consumer onError) { + mAsyncIOExecutor.submit(() -> { + try { + UfsStatus baseStatus = null; + if (checkStatus) { + try { + baseStatus = getStatus(path); + if (baseStatus == null && !isObjectStorage()) { + onComplete.accept(new UfsLoadResult(Stream.empty(), 0, + null, null, false, false, false)); + return; + } + if (baseStatus != null && (descendantType == DescendantType.NONE + || baseStatus.isFile())) { + onComplete.accept(new UfsLoadResult(Stream.of(baseStatus), 1, + null, new AlluxioURI(baseStatus.getName()), false, + baseStatus.isFile(), isObjectStorage())); + return; + } + } catch (FileNotFoundException e) { + // if we are not using object storage we know nothing exists at the path, + // so just return an empty result + if (!isObjectStorage()) { + onComplete.accept(new UfsLoadResult(Stream.empty(), 0, + null, null, false, false, false)); + return; + } + } + } + UfsStatus[] items = listStatus(path, ListOptions.defaults() + .setRecursive(descendantType == DescendantType.ALL)); + if (items != null) { + if (descendantType == DescendantType.NONE && items.length > 0) { + assert isObjectStorage() && this instanceof ObjectUnderFileSystem; + ObjectUnderFileSystem.ObjectPermissions permissions = + ((ObjectUnderFileSystem) this).getPermissions(); + items = new UfsStatus[] { + new UfsDirectoryStatus("", permissions.getOwner(), permissions.getGroup(), + permissions.getMode())}; + } + Arrays.sort(items, Comparator.comparing(UfsStatus::getName)); + for (UfsStatus item: items) { + // performListingAsync is used by metadata sync v2 + // which expects the name of an item to be a full path + item.setName(PathUtils.concatPath(path, item.getName())); + } + } + if (items != null && items.length == 0) { + items = null; + } + UfsStatus firstItem = baseStatus != null ? baseStatus + : items != null ? items[0] : null; + UfsStatus lastItem = items == null ? firstItem + : items[items.length - 1]; + Stream itemStream = items == null ? Stream.empty() : Arrays.stream(items); + int itemCount = items == null ? 0 : items.length; + if (baseStatus != null) { + itemStream = Stream.concat(Stream.of(baseStatus), itemStream); + itemCount++; + } + onComplete.accept(new UfsLoadResult(itemStream, itemCount, + null, lastItem == null ? null : new AlluxioURI(lastItem.getName()), false, + firstItem != null && firstItem.isFile(), isObjectStorage())); + } catch (Throwable t) { + onError.accept(t); + } + }); + } + @Override @Nullable public UfsStatus[] listStatus(String path, ListOptions options) throws IOException { @@ -195,7 +326,7 @@ public UfsStatus[] listStatus(String path, ListOptions options) throws IOExcepti } } } - return returnPaths.toArray(new UfsStatus[returnPaths.size()]); + return returnPaths.toArray(new UfsStatus[0]); } @Override diff --git a/core/common/src/main/java/alluxio/underfs/ContentHashable.java b/core/common/src/main/java/alluxio/underfs/ContentHashable.java new file mode 100644 index 000000000000..2975bfe14204 --- /dev/null +++ b/core/common/src/main/java/alluxio/underfs/ContentHashable.java @@ -0,0 +1,29 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs; + +import java.io.IOException; +import java.util.Optional; + +/** + * Interface for returning the content hash. Instances of {@link java.io.OutputStream} returned by + * {@link UnderFileSystem#create} may implement this interface if the UFS returns the hash of the + * content written when the stream is closed. The content hash will then be used as part of + * the metadata fingerprint when the file is completed on the Alluxio master. + */ +public interface ContentHashable { + /** + * @return the content hash of the file written to the UFS if available + * after the stream has been closed + */ + Optional getContentHash() throws IOException; +} diff --git a/core/common/src/main/java/alluxio/underfs/Fingerprint.java b/core/common/src/main/java/alluxio/underfs/Fingerprint.java index 7fa04358103a..2d41d02432f6 100644 --- a/core/common/src/main/java/alluxio/underfs/Fingerprint.java +++ b/core/common/src/main/java/alluxio/underfs/Fingerprint.java @@ -14,12 +14,13 @@ import alluxio.Constants; import alluxio.security.authorization.AccessControlList; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Splitter; +import org.apache.commons.lang3.StringUtils; import java.util.Collections; import java.util.HashMap; import java.util.Map; -import java.util.regex.Pattern; import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; @@ -43,12 +44,13 @@ public final class Fingerprint { private static final char KVDELIMTER = '|'; private static final char TAGDELIMTER = ' '; - private static final Pattern SANITIZE_REGEX = Pattern.compile("[" + KVDELIMTER - + TAGDELIMTER + "]"); public static final String UNDERSCORE = "_"; private final Map mValues; + private final String[] mSearchList = new String[] {"|", " "}; + private final String[] mReplaceList = new String[] {"_", "_"}; + /** * The possible types of the fingerprint. */ @@ -78,10 +80,7 @@ public enum Tag { * @return the fingerprint object */ public static Fingerprint create(String ufsName, UfsStatus status) { - if (status == null) { - return new Fingerprint(Collections.emptyMap()); - } - return new Fingerprint(Fingerprint.createTags(ufsName, status)); + return Fingerprint.create(ufsName, status, null); } /** @@ -89,14 +88,35 @@ public static Fingerprint create(String ufsName, UfsStatus status) { * * @param ufsName the name of the ufs, should be {@link UnderFileSystem#getUnderFSType()} * @param status the {@link UfsStatus} to create the fingerprint from + * @param contentHash the hash of the contents, if null the hash will be taken from + * the {@link UfsStatus} parameter + * @return the fingerprint object + */ + public static Fingerprint create(String ufsName, UfsStatus status, + @Nullable String contentHash) { + return create(ufsName, status, contentHash, null); + } + + /** + * Parses the input string and returns the fingerprint object. + * + * @param ufsName the name of the ufs, should be {@link UnderFileSystem#getUnderFSType()} + * @param status the {@link UfsStatus} to create the fingerprint from + * @param contentHash the hash of the contents, if null the hash will be taken from + * the {@link UfsStatus} parameter * @param acl the {@link AccessControlList} to create the fingerprint from * @return the fingerprint object */ - public static Fingerprint create(String ufsName, UfsStatus status, AccessControlList acl) { + public static Fingerprint create(String ufsName, UfsStatus status, + @Nullable String contentHash, @Nullable AccessControlList acl) { if (status == null) { return new Fingerprint(Collections.emptyMap()); } - Map tagMap = Fingerprint.createTags(ufsName, status); + return finishCreate(Fingerprint.createTags(ufsName, status, contentHash), acl); + } + + private static Fingerprint finishCreate(Map tagMap, + @Nullable AccessControlList acl) { if (acl != null) { tagMap.put(Tag.ACL, acl.toString()); } @@ -108,9 +128,12 @@ public static Fingerprint create(String ufsName, UfsStatus status, AccessControl * * @param ufsName the name of the ufs, should be {@link UnderFileSystem#getUnderFSType()} * @param status the {@link UfsStatus} to create the tagmap from + * @param contentHash the hash of the contents, if null the hash will be taken from + * the {@link UfsStatus} parameter * @return the tag map object */ - private static Map createTags(String ufsName, UfsStatus status) { + private static Map createTags(String ufsName, UfsStatus status, + @Nullable String contentHash) { Map tagMap = new HashMap<>(); tagMap.put(Tag.UFS, ufsName); tagMap.put(Tag.OWNER, status.getOwner()); @@ -118,7 +141,8 @@ private static Map createTags(String ufsName, UfsStatus status) { tagMap.put(Tag.MODE, String.valueOf(status.getMode())); if (status instanceof UfsFileStatus) { tagMap.put(Tag.TYPE, Type.FILE.name()); - tagMap.put(Tag.CONTENT_HASH, ((UfsFileStatus) status).getContentHash()); + tagMap.put(Tag.CONTENT_HASH, contentHash == null + ? ((UfsFileStatus) status).getContentHash() : contentHash); } else { tagMap.put(Tag.TYPE, Type.DIRECTORY.name()); } @@ -260,10 +284,11 @@ private Fingerprint(Map values) { } } - private String sanitizeString(String input) { + @VisibleForTesting + String sanitizeString(String input) { if (input == null || input.isEmpty()) { return UNDERSCORE; } - return SANITIZE_REGEX.matcher(input).replaceAll(UNDERSCORE); + return StringUtils.replaceEachRepeatedly(input, mSearchList, mReplaceList); } } diff --git a/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java b/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java new file mode 100644 index 000000000000..a5d3bd1a1ebb --- /dev/null +++ b/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java @@ -0,0 +1,405 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs; + +import alluxio.Constants; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.retry.CountingRetry; +import alluxio.retry.RetryPolicy; +import alluxio.retry.RetryUtils; +import alluxio.util.CommonUtils; +import alluxio.util.io.PathUtils; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.ListeningExecutorService; +import org.apache.commons.codec.binary.Base64; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.security.DigestOutputStream; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; +import javax.annotation.Nullable; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * [Experimental] A stream for writing a file into object storage using streaming upload. + * The data transfer is done using object storage low-level multipart upload. + *

+ * We upload data in partitions. When write(), the data will be persisted to + * a temporary file {@link #mFile} on the local disk. When the data {@link #mPartitionOffset} + * in this temporary file reaches the {@link #mPartitionSize}, the file will be submitted + * to the upload executor {@link #mExecutor} and we do not wait for uploads to finish. + * A new temp file will be created for the future write and the {@link #mPartitionOffset} + * will be reset to zero. The process goes until all the data has been written to temp files. + *

+ * In flush(), we upload the buffered data if they are bigger than 5MB + * and wait for all uploads to finish. The temp files will be deleted after uploading successfully. + *

+ * In close(), we upload the last part of data (if exists), wait for all uploads to finish, + * and complete the multipart upload. + *

+ * close() will not be retried, but all the multipart upload + * related operations(init, upload, complete, and abort) will be retried. + *

+ * If an error occurs and we have no way to recover, we abort the multipart uploads. + * Some multipart uploads may not be completed/aborted in normal ways and need periodical cleanup + * by enabling the {@link PropertyKey#UNDERFS_CLEANUP_ENABLED}. + * When a leader master starts or a cleanup interval is reached, all the multipart uploads + * older than clean age will be cleaned. + */ +@NotThreadSafe +public abstract class ObjectLowLevelOutputStream extends OutputStream + implements ContentHashable { + protected static final Logger LOG = LoggerFactory.getLogger(ObjectLowLevelOutputStream.class); + + protected final List mTmpDirs; + + /** + * Only parts bigger than 5MB could be uploaded through multipart upload, + * except the last part. + */ + protected static final long UPLOAD_THRESHOLD = 5L * Constants.MB; + + /** Bucket name of the object storage bucket. */ + protected final String mBucketName; + + /** Key of the file when it is uploaded to object storage. */ + protected final String mKey; + + /** The retry policy of this multipart upload. */ + protected final Supplier mRetryPolicy = () -> new CountingRetry(5); + + /** Pre-allocated byte buffer for writing single characters. */ + protected final byte[] mSingleCharWrite = new byte[1]; + + /** The MD5 hash of the file. */ + @Nullable + protected MessageDigest mHash; + + /** Flag to indicate this stream has been closed, to ensure close is only done once. */ + protected boolean mClosed = false; + + /** When the offset reaches the partition size, we upload the temp file. */ + protected long mPartitionOffset; + /** The maximum allowed size of a partition. */ + protected final long mPartitionSize; + + /** + * The local temp file that will be uploaded when reaches the partition size + * or when flush() is called and this file is bigger than {@link #UPLOAD_THRESHOLD}. + */ + @Nullable + protected File mFile; + /** The output stream to the local temp file. */ + @Nullable + protected OutputStream mLocalOutputStream; + + /** + * Give each upload request a unique and continuous id + * so that object storage knows the part sequence to concatenate the parts to a single object. + */ + private final AtomicInteger mPartNumber; + + /** Executing the upload tasks. */ + private final ListeningExecutorService mExecutor; + + /** Store the future of tags. */ + private final List> mFutures = new ArrayList<>(); + + /** upload part timeout, null means no timeout. */ + @Nullable + private Long mUploadPartTimeoutMills; + + /** Whether the multi upload has been initialized. */ + private boolean mMultiPartUploadInitialized = false; + + /** + * Constructs a new stream for writing a file. + * + * @param bucketName the name of the bucket + * @param key the key of the file + * @param streamingUploadPartitionSize the size in bytes for partitions of streaming uploads + * @param executor executor + * @param ufsConf the object store under file system configuration + */ + public ObjectLowLevelOutputStream( + String bucketName, + String key, + ListeningExecutorService executor, + long streamingUploadPartitionSize, + AlluxioConfiguration ufsConf) { + Preconditions.checkArgument(bucketName != null && !bucketName.isEmpty(), + "Bucket name must not be null or empty."); + mBucketName = bucketName; + mTmpDirs = ufsConf.getList(PropertyKey.TMP_DIRS); + Preconditions.checkArgument(!mTmpDirs.isEmpty(), "No temporary directories available"); + mExecutor = executor; + mKey = key; + initHash(); + mPartitionSize = Math.max(UPLOAD_THRESHOLD, streamingUploadPartitionSize); + mPartNumber = new AtomicInteger(1); + if (ufsConf.isSet(PropertyKey.UNDERFS_OBJECT_STORE_STREAMING_UPLOAD_PART_TIMEOUT)) { + mUploadPartTimeoutMills = + ufsConf.getDuration(PropertyKey.UNDERFS_OBJECT_STORE_STREAMING_UPLOAD_PART_TIMEOUT) + .toMillis(); + } + } + + @Override + public void write(int b) throws IOException { + mSingleCharWrite[0] = (byte) b; + write(mSingleCharWrite); + } + + @Override + public void write(byte[] b) throws IOException { + write(b, 0, b.length); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + if (b == null || len == 0) { + return; + } + Preconditions.checkNotNull(b); + Preconditions.checkArgument(off >= 0 && off <= b.length && len >= 0 && off + len <= b.length); + if (mFile == null) { + initNewFile(); + } + if (mPartitionOffset + len <= mPartitionSize) { + mLocalOutputStream.write(b, off, len); + mPartitionOffset += len; + } else { + int firstLen = (int) (mPartitionSize - mPartitionOffset); + mLocalOutputStream.write(b, off, firstLen); + mPartitionOffset += firstLen; + uploadPart(); + write(b, off + firstLen, len - firstLen); + } + } + + @Override + public void flush() throws IOException { + if (!mMultiPartUploadInitialized) { + return; + } + // We try to minimize the time use to close() + // because Fuse release() method which calls close() is async. + // In flush(), we upload the current writing file if it is bigger than 5 MB, + // and wait for all current upload to complete. + if (mLocalOutputStream != null) { + mLocalOutputStream.flush(); + } + if (mPartitionOffset > UPLOAD_THRESHOLD) { + uploadPart(); + } + waitForAllPartsUpload(); + } + + @Override + public void close() throws IOException { + if (mClosed) { + return; + } + + // Set the closed flag, we never retry close() even if exception occurs + mClosed = true; + + // Multi-part upload has not been initialized + if (!mMultiPartUploadInitialized) { + if (mFile == null) { + LOG.debug("Streaming upload output stream closed without uploading any data."); + RetryUtils.retry("put empty object for key" + mKey, () -> createEmptyObject(mKey), + mRetryPolicy.get()); + } else { + try { + mLocalOutputStream.close(); + final String md5 = mHash != null ? Base64.encodeBase64String(mHash.digest()) : null; + RetryUtils.retry("put object for key" + mKey, () -> putObject(mKey, mFile, md5), + mRetryPolicy.get()); + } finally { + if (!mFile.delete()) { + LOG.error("Failed to delete temporary file @ {}", mFile.getPath()); + } + } + } + return; + } + + try { + if (mFile != null) { + mLocalOutputStream.close(); + int partNumber = mPartNumber.getAndIncrement(); + uploadPart(mFile, partNumber, true); + } + + waitForAllPartsUpload(); + RetryUtils.retry("complete multipart upload", + this::completeMultiPartUploadInternal, mRetryPolicy.get()); + } catch (Exception e) { + LOG.error("Failed to upload {}", mKey, e); + throw new IOException(e); + } + } + + /** + * Creates a new temp file to write to. + */ + private void initNewFile() throws IOException { + mFile = new File(PathUtils.concatPath(CommonUtils.getTmpDir(mTmpDirs), UUID.randomUUID())); + initHash(); + if (mHash != null) { + mLocalOutputStream = + new BufferedOutputStream(new DigestOutputStream(new FileOutputStream(mFile), mHash)); + } else { + mLocalOutputStream = new BufferedOutputStream(new FileOutputStream(mFile)); + } + mPartitionOffset = 0; + LOG.debug("Init new temp file @ {}", mFile.getPath()); + } + + private void initHash() { + try { + mHash = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + LOG.warn("Algorithm not available for MD5 hash.", e); + mHash = null; + } + } + + /** + * Uploads part async. + */ + protected void uploadPart() throws IOException { + if (mFile == null) { + return; + } + if (!mMultiPartUploadInitialized) { + RetryUtils.retry("init multipart upload", this::initMultiPartUploadInternal, + mRetryPolicy.get()); + mMultiPartUploadInitialized = true; + } + mLocalOutputStream.close(); + int partNumber = mPartNumber.getAndIncrement(); + uploadPart(new File(mFile.getPath()), partNumber, false); + mFile = null; + mLocalOutputStream = null; + } + + protected void uploadPart(File file, int partNumber, boolean lastPart) { + final String md5 = mHash != null ? Base64.encodeBase64String(mHash.digest()) : null; + Callable callable = () -> { + try { + RetryUtils.retry("upload part for key " + mKey + " and part number " + partNumber, + () -> uploadPartInternal(file, partNumber, lastPart, md5), mRetryPolicy.get()); + return null; + } finally { + // Delete the uploaded or failed to upload file + if (!file.delete()) { + LOG.error("Failed to delete temporary file @ {}", file.getPath()); + } + } + }; + ListenableFuture futureTag = mExecutor.submit(callable); + mFutures.add(futureTag); + LOG.debug( + "Submit upload part request. key={}, partNum={}, file={}, fileSize={}, lastPart={}.", + mKey, partNumber, file.getPath(), file.length(), lastPart); + } + + protected void abortMultiPartUpload() { + try { + RetryUtils.retry("abort multipart upload for key " + mKey, this::abortMultiPartUploadInternal, + mRetryPolicy.get()); + } catch (IOException e) { + LOG.warn("Unable to abort multipart upload for key '{}' and id '{}' to bucket {}. " + + "You may need to enable the periodical cleanup by setting property {}" + + "to be true.", mKey, mBucketName, + PropertyKey.UNDERFS_CLEANUP_ENABLED.getName(), e); + } + } + + protected void waitForAllPartsUpload() throws IOException { + try { + for (ListenableFuture future : mFutures) { + if (mUploadPartTimeoutMills == null) { + future.get(); + } else { + future.get(mUploadPartTimeoutMills, TimeUnit.MILLISECONDS); + } + } + } catch (ExecutionException e) { + // No recover ways so that we need to cancel all the upload tasks + // and abort the multipart upload + Futures.allAsList(mFutures).cancel(true); + abortMultiPartUpload(); + throw new IOException( + "Part upload failed in multipart upload with to " + mKey, e); + } catch (InterruptedException e) { + LOG.warn("Interrupted object upload.", e); + Futures.allAsList(mFutures).cancel(true); + abortMultiPartUpload(); + Thread.currentThread().interrupt(); + } catch (TimeoutException e) { + LOG.error("timeout when upload part"); + Futures.allAsList(mFutures).cancel(true); + abortMultiPartUpload(); + throw new IOException("timeout when upload part " + mKey, e); + } + mFutures.clear(); + } + + /** + * Get the part number. + * @return the part number + */ + @VisibleForTesting + public int getPartNumber() { + return mPartNumber.get(); + } + + protected abstract void uploadPartInternal( + File file, + int partNumber, + boolean isLastPart, + @Nullable String md5) + throws IOException; + + protected abstract void initMultiPartUploadInternal() throws IOException; + + protected abstract void completeMultiPartUploadInternal() throws IOException; + + protected abstract void abortMultiPartUploadInternal() throws IOException; + + protected abstract void createEmptyObject(String key) throws IOException; + + protected abstract void putObject(String key, File file, @Nullable String md5) throws IOException; +} diff --git a/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java b/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java index 08465dcb9a3d..6c918b51c73e 100755 --- a/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java +++ b/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java @@ -22,6 +22,7 @@ import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; @@ -30,6 +31,7 @@ import alluxio.util.io.PathUtils; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Iterators; import org.apache.http.conn.ConnectTimeoutException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,15 +50,16 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.concurrent.Callable; +import java.util.NavigableMap; +import java.util.TreeMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.function.Supplier; import javax.annotation.Nullable; -import javax.annotation.concurrent.NotThreadSafe; import javax.annotation.concurrent.ThreadSafe; /** @@ -105,7 +108,7 @@ protected ObjectUnderFileSystem(AlluxioURI uri, UnderFileSystemConfiguration ufs /** * Information about a single object in object UFS. */ - protected class ObjectStatus { + protected static class ObjectStatus { private static final long INVALID_CONTENT_LENGTH = -1L; private final String mContentHash; @@ -191,12 +194,20 @@ public interface ObjectListingChunk { */ @Nullable ObjectListingChunk getNextChunk() throws IOException; + + /** + * Gets if there is more chunks to fetch WITHOUT actually fetching the next chunk. + * @return true if there is, no if there isn't, NULL if it cannot tell + */ + default @Nullable Boolean hasNextChunk() { + return null; + } } /** * Permissions in object UFS. */ - public class ObjectPermissions { + public static class ObjectPermissions { final String mOwner; final String mGroup; final short mMode; @@ -238,16 +249,16 @@ public short getMode() { /** * Operations added to this buffer are performed concurrently. + * Note that {@link #getResult()} method blocks {@link #add(Object)} method. * - * @param T input type for operation + * @param input type for operation */ + @ThreadSafe protected abstract class OperationBuffer { - /** A list of inputs in batches to be operated on in parallel. */ - private ArrayList> mBatches; /** A list of the successful operations for each batch. */ - private ArrayList>> mBatchesResult; + private final ArrayList>> mBatchesResult; /** Buffer for a batch of inputs. */ - private List mCurrentBatchBuffer; + private final List mCurrentBatchBuffer; /** Total number of inputs to be operated on across batches. */ protected int mEntriesAdded; @@ -255,7 +266,6 @@ protected abstract class OperationBuffer { * Construct a new {@link OperationBuffer} instance. */ protected OperationBuffer() { - mBatches = new ArrayList<>(); mBatchesResult = new ArrayList<>(); mCurrentBatchBuffer = new ArrayList<>(); mEntriesAdded = 0; @@ -282,7 +292,7 @@ protected OperationBuffer() { * @param input the input to operate on * @throws IOException if a non-Alluxio error occurs */ - public void add(T input) throws IOException { + public synchronized void add(T input) throws IOException { if (mCurrentBatchBuffer.size() == getBatchSize()) { // Batch is full submitBatch(); @@ -297,7 +307,7 @@ public void add(T input) throws IOException { * @return a list of inputs for successful operations * @throws IOException if a non-Alluxio error occurs */ - public List getResult() throws IOException { + public synchronized List getResult() throws IOException { submitBatch(); List result = new ArrayList<>(); for (Future> list : mBatchesResult) { @@ -325,38 +335,16 @@ public List getResult() throws IOException { */ private void submitBatch() throws IOException { if (mCurrentBatchBuffer.size() != 0) { - int batchNumber = mBatches.size(); - mBatches.add(new ArrayList<>(mCurrentBatchBuffer)); + List batch = new ArrayList<>(mCurrentBatchBuffer); mCurrentBatchBuffer.clear(); - mBatchesResult.add(batchNumber, - mExecutorService.submit(new OperationThread(mBatches.get(batchNumber)))); - } - } - - /** - * Thread class to operate on a batch of objects. - */ - @NotThreadSafe - protected class OperationThread implements Callable> { - List mBatch; - - /** - * Operate on a batch of inputs. - * - * @param batch a list of inputs for the current batch - */ - public OperationThread(List batch) { - mBatch = batch; - } - - @Override - public List call() { - try { - return operate(mBatch); - } catch (IOException e) { - // Do not append to success list - return Collections.emptyList(); - } + mBatchesResult.add(mExecutorService.submit(() -> { + try { + return operate(batch); + } catch (IOException e) { + // Do not append to success list + return Collections.emptyList(); + } + })); } } } @@ -465,7 +453,7 @@ public boolean deleteExistingDirectory(String path, DeleteOptions options) throw /** * Object keys added to a {@link DeleteBuffer} will be deleted in batches. */ - @NotThreadSafe + @ThreadSafe protected class DeleteBuffer extends OperationBuffer { /** * Construct a new {@link DeleteBuffer} instance. @@ -536,7 +524,7 @@ public long getSpace(String path, SpaceType type) throws IOException { } @Override - public UfsFileStatus getFileStatus(String path) throws IOException { + public UfsFileStatus getFileStatus(String path, GetFileStatusOptions options) throws IOException { ObjectStatus details = getObjectStatus(stripPrefixIfPresent(path)); if (details != null) { ObjectPermissions permissions = getPermissions(); @@ -616,6 +604,23 @@ public UfsStatus[] listStatus(String path, ListOptions options) return listInternal(path, options); } + @Nullable + @Override + public Iterator listStatusIterable( + String path, ListOptions options, String startAfter, int batchSize) throws IOException { + final ObjectListingChunk chunk = + getObjectListingChunkForPath(path, options.isRecursive(), startAfter, batchSize); + if (chunk == null) { + String keyAsFolder = convertToFolderName(stripPrefixIfPresent(path)); + if (getObjectStatus(keyAsFolder) != null) { + // Path is an empty directory + return Collections.emptyIterator(); + } + return null; + } + return new UfsStatusIterator(path, options.isRecursive(), chunk); + } + @Override public boolean mkdirs(String path, MkdirsOptions options) throws IOException { if (path == null) { @@ -665,31 +670,47 @@ public InputStream openExistingFile(String path, OpenOptions options) throws IOE @Override public boolean renameDirectory(String src, String dst) throws IOException { + if (exists(dst)) { + LOG.error("Unable to rename {} to {} because destination already exists.", src, dst); + return false; + } + // Use a global delete buffer, in order to merge delete object requests + DeleteBuffer deleteBuffer = new DeleteBuffer(); + boolean result = renameDirectoryInternal(src, dst, deleteBuffer); + int fileDeleted = deleteBuffer.getResult().size(); + if (fileDeleted != deleteBuffer.mEntriesAdded) { + LOG.warn("Failed to rename directory, successfully deleted {} files out of {}.", + fileDeleted, deleteBuffer.mEntriesAdded); + return false; + } + return result; + } + + private boolean renameDirectoryInternal(String src, String dst, DeleteBuffer deleteBuffer) + throws IOException { UfsStatus[] children = listInternal(src, ListOptions.defaults()); if (children == null) { LOG.error("Failed to list directory {}, aborting rename.", src); return false; } - if (exists(dst)) { - LOG.error("Unable to rename {} to {} because destination already exists.", src, dst); - return false; - } // Source exists and is a directory, and destination does not exist // Rename the source folder first - if (!copyObject(stripPrefixIfPresent(convertToFolderName(src)), - stripPrefixIfPresent(convertToFolderName(dst)))) { + String srcKey = stripPrefixIfPresent(convertToFolderName(src)); + if (!copyObject(srcKey, stripPrefixIfPresent(convertToFolderName(dst)))) { return false; } + deleteBuffer.add(srcKey); + // Rename each child in the src folder to destination/child // a. Since renames are a copy operation, files are added to a buffer and processed concurrently // b. Pseudo-directories are metadata only operations are not added to the buffer - RenameBuffer buffer = new RenameBuffer(); + RenameBuffer buffer = new RenameBuffer(deleteBuffer); for (UfsStatus child : children) { String childSrcPath = PathUtils.concatPath(src, child.getName()); String childDstPath = PathUtils.concatPath(dst, child.getName()); if (child.isDirectory()) { // Recursive call - if (!renameDirectory(childSrcPath, childDstPath)) { + if (!renameDirectoryInternal(childSrcPath, childDstPath, deleteBuffer)) { LOG.error("Failed to rename path {} to {}, aborting rename.", childSrcPath, childDstPath); return false; } @@ -704,8 +725,7 @@ public boolean renameDirectory(String src, String dst) throws IOException { filesRenamed, buffer.mEntriesAdded); return false; } - // Delete src and everything under src - return deleteDirectory(src, DeleteOptions.defaults().setRecursive(true)); + return true; } @Override @@ -717,12 +737,18 @@ public boolean renameRenamableDirectory(String src, String dst) throws IOExcepti /** * File paths added to a {@link RenameBuffer} will be renamed concurrently. */ - @NotThreadSafe + @ThreadSafe protected class RenameBuffer extends OperationBuffer> { + private final DeleteBuffer mDeleteBuffer; + /** * Construct a new {@link RenameBuffer} instance. + * + * @param deleteBuffer delete object buffer */ - public RenameBuffer() {} + public RenameBuffer(DeleteBuffer deleteBuffer) { + mDeleteBuffer = deleteBuffer; + } @Override protected int getBatchSize() { @@ -734,7 +760,10 @@ protected List> operate(List> paths) throws IOException { List> succeeded = new ArrayList<>(); for (Pair pathPair : paths) { - if (renameFile(pathPair.getFirst(), pathPair.getSecond())) { + String src = stripPrefixIfPresent(pathPair.getFirst()); + String dst = stripPrefixIfPresent(pathPair.getSecond()); + if (copyObject(src, dst)) { + mDeleteBuffer.add(src); succeeded.add(pathPair); } } @@ -857,8 +886,7 @@ protected int getListingChunkLengthMax() { * @return length of each list request */ protected int getListingChunkLength(AlluxioConfiguration conf) { - return conf.getInt(PropertyKey.UNDERFS_LISTING_LENGTH) > getListingChunkLengthMax() - ? getListingChunkLengthMax() : conf.getInt(PropertyKey.UNDERFS_LISTING_LENGTH); + return Math.min(conf.getInt(PropertyKey.UNDERFS_LISTING_LENGTH), getListingChunkLengthMax()); } /** @@ -930,25 +958,50 @@ protected String getChildName(String child, String parent) throws IOException { * * @param key pseudo-directory key excluding header and bucket * @param recursive whether to request immediate children only, or all descendants + * @param startAfter indicates where the listing starts + * @param batchSize the batch size of each chunk * @return chunked object listing, or null if key is not found */ @Nullable - protected abstract ObjectListingChunk getObjectListingChunk(String key, boolean recursive) - throws IOException; + protected ObjectListingChunk getObjectListingChunk( + String key, boolean recursive, String startAfter, int batchSize) throws IOException { + // Some UFS haven't implemented getObjectListingChunk(dir, recursive, startAfter, batchSize) + // so falling back to the one with less param if startAfter and batchSize is unset. + if (startAfter == null && batchSize == 0) { + return getObjectListingChunk(key, recursive); + } + throw new UnsupportedOperationException("Operation not supported"); + } /** - * Gets a (partial) object listing for the given path. + * Gets a (partial) object listing result for the given key. * - * @param path of pseudo-directory + * @param key pseudo-directory key excluding header and bucket * @param recursive whether to request immediate children only, or all descendants - * @return chunked object listing, or null if the path does not exist as a pseudo-directory + * @return chunked object listing, or null if key is not found */ @Nullable + protected abstract ObjectListingChunk getObjectListingChunk(String key, boolean recursive) + throws IOException; + protected ObjectListingChunk getObjectListingChunkForPath(String path, boolean recursive) throws IOException { + return getObjectListingChunkForPath(path, recursive, null, 0); + } + + /** + * Gets a (partial) object listing for the given path. + * + * @param path of pseudo-directory + * @param recursive whether to request immediate children only, or all descendants + * @return chunked object listing, or null if the path does not exist as a pseudo-directory + */ + @Nullable + protected ObjectListingChunk getObjectListingChunkForPath( + String path, boolean recursive, String startAfter, int batchSize) throws IOException { // Check if anything begins with / String dir = stripPrefixIfPresent(path); - ObjectListingChunk objs = getObjectListingChunk(dir, recursive); + ObjectListingChunk objs = getObjectListingChunk(dir, recursive, startAfter, batchSize); // If there are, this is a folder and we can create the necessary metadata if (objs != null && ((objs.getObjectStatuses() != null && objs.getObjectStatuses().length > 0) @@ -965,6 +1018,86 @@ protected ObjectListingChunk getObjectListingChunkForPath(String path, boolean r return null; } + private void populateUfsStatus( + String keyPrefix, ObjectListingChunk chunk, + boolean isRecursive, Map ufsStatusMap) throws IOException { + // Directories in UFS can be possibly encoded in two different ways: + // (1) as file objects with FOLDER_SUFFIX for directories created through Alluxio or + // (2) as "common prefixes" of other files objects for directories not created through + // Alluxio + // + // Case (1) (and file objects) is accounted for by iterating over chunk.getObjects() while + // case (2) is accounted for by iterating over chunk.getCommonPrefixes(). + // + // An example, with prefix="ufs" and delimiter="/" and LISTING_LENGTH=5 + // - objects.key = ufs/, child = + // - objects.key = ufs/dir1, child = dir1 + // - objects.key = ufs/file, child = file + // - commonPrefix = ufs/dir1/, child = dir1 + // - commonPrefix = ufs/dir2/, child = dir2 + + // Handle case (1) + for (ObjectStatus status : chunk.getObjectStatuses()) { + // Remove parent portion of the key + String child = getChildName(status.getName(), keyPrefix); + if (child.isEmpty() || child.equals(getFolderSuffix())) { + // Removes results equal to the path + continue; + } + ObjectPermissions permissions = getPermissions(); + if (child.endsWith(getFolderSuffix())) { + // Child is a directory + child = CommonUtils.stripSuffixIfPresent(child, getFolderSuffix()); + ufsStatusMap.put(child, new UfsDirectoryStatus(child, permissions.getOwner(), + permissions.getGroup(), permissions.getMode())); + } else { + // Child is a file + ufsStatusMap.put(child, + new UfsFileStatus(child, status.getContentHash(), status.getContentLength(), + status.getLastModifiedTimeMs(), permissions.getOwner(), permissions.getGroup(), + permissions.getMode(), + mUfsConf.getBytes(PropertyKey.USER_BLOCK_SIZE_BYTES_DEFAULT))); + } + } + // Handle case (2) + String[] commonPrefixes; + if (isRecursive) { + // In case of a recursive listing infer pseudo-directories as the commonPrefixes returned + // from the object store is empty for an empty delimiter. + HashSet prefixes = new HashSet<>(); + for (ObjectStatus objectStatus : chunk.getObjectStatuses()) { + String objectName = objectStatus.getName(); + while (objectName.startsWith(keyPrefix) && objectName.contains(PATH_SEPARATOR)) { + objectName = objectName.substring(0, objectName.lastIndexOf(PATH_SEPARATOR)); + if (!objectName.isEmpty()) { + // include the separator with the prefix, to conform to what object stores return + // as common prefixes. + prefixes.add(PathUtils.normalizePath(objectName, PATH_SEPARATOR)); + } + } + } + commonPrefixes = prefixes.toArray(new String[0]); + } else { + commonPrefixes = chunk.getCommonPrefixes(); + } + for (String commonPrefix : commonPrefixes) { + if (commonPrefix.startsWith(keyPrefix)) { + // Remove parent portion of the key + String child = getChildName(commonPrefix, keyPrefix); + // Remove any portion after the last path delimiter + int childNameIndex = child.lastIndexOf(PATH_SEPARATOR); + child = childNameIndex != -1 ? child.substring(0, childNameIndex) : child; + if (!child.isEmpty() && !ufsStatusMap.containsKey(child)) { + // If both a file and a directory existed with the same name, the path will be + // treated as a directory + ObjectPermissions permissions = getPermissions(); + ufsStatusMap.put(child, new UfsDirectoryStatus(child, permissions.getOwner(), + permissions.getGroup(), permissions.getMode())); + } + } + } + } + /** * Get full path of root in object store. * @@ -995,81 +1128,7 @@ protected UfsStatus[] listInternal(String path, ListOptions options) throws IOEx keyPrefix = keyPrefix.equals(PATH_SEPARATOR) ? "" : keyPrefix; Map children = new HashMap<>(); while (chunk != null) { - // Directories in UFS can be possibly encoded in two different ways: - // (1) as file objects with FOLDER_SUFFIX for directories created through Alluxio or - // (2) as "common prefixes" of other files objects for directories not created through - // Alluxio - // - // Case (1) (and file objects) is accounted for by iterating over chunk.getObjects() while - // case (2) is accounted for by iterating over chunk.getCommonPrefixes(). - // - // An example, with prefix="ufs" and delimiter="/" and LISTING_LENGTH=5 - // - objects.key = ufs/, child = - // - objects.key = ufs/dir1, child = dir1 - // - objects.key = ufs/file, child = file - // - commonPrefix = ufs/dir1/, child = dir1 - // - commonPrefix = ufs/dir2/, child = dir2 - - // Handle case (1) - for (ObjectStatus status : chunk.getObjectStatuses()) { - // Remove parent portion of the key - String child = getChildName(status.getName(), keyPrefix); - if (child.isEmpty() || child.equals(getFolderSuffix())) { - // Removes results equal to the path - continue; - } - ObjectPermissions permissions = getPermissions(); - if (child.endsWith(getFolderSuffix())) { - // Child is a directory - child = CommonUtils.stripSuffixIfPresent(child, getFolderSuffix()); - children.put(child, new UfsDirectoryStatus(child, permissions.getOwner(), - permissions.getGroup(), permissions.getMode())); - } else { - // Child is a file - children.put(child, - new UfsFileStatus(child, status.getContentHash(), status.getContentLength(), - status.getLastModifiedTimeMs(), permissions.getOwner(), permissions.getGroup(), - permissions.getMode(), - mUfsConf.getBytes(PropertyKey.USER_BLOCK_SIZE_BYTES_DEFAULT))); - } - } - // Handle case (2) - String[] commonPrefixes; - if (options.isRecursive()) { - // In case of a recursive listing infer pseudo-directories as the commonPrefixes returned - // from the object store is empty for an empty delimiter. - HashSet prefixes = new HashSet<>(); - for (ObjectStatus objectStatus : chunk.getObjectStatuses()) { - String objectName = objectStatus.getName(); - while (objectName.startsWith(keyPrefix) && objectName.contains(PATH_SEPARATOR)) { - objectName = objectName.substring(0, objectName.lastIndexOf(PATH_SEPARATOR)); - if (!objectName.isEmpty()) { - // include the separator with the prefix, to conform to what object stores return - // as common prefixes. - prefixes.add(PathUtils.normalizePath(objectName, PATH_SEPARATOR)); - } - } - } - commonPrefixes = prefixes.toArray(new String[prefixes.size()]); - } else { - commonPrefixes = chunk.getCommonPrefixes(); - } - for (String commonPrefix : commonPrefixes) { - if (commonPrefix.startsWith(keyPrefix)) { - // Remove parent portion of the key - String child = getChildName(commonPrefix, keyPrefix); - // Remove any portion after the last path delimiter - int childNameIndex = child.lastIndexOf(PATH_SEPARATOR); - child = childNameIndex != -1 ? child.substring(0, childNameIndex) : child; - if (!child.isEmpty() && !children.containsKey(child)) { - // If both a file and a directory existed with the same name, the path will be - // treated as a directory - ObjectPermissions permissions = getPermissions(); - children.put(child, new UfsDirectoryStatus(child, permissions.getOwner(), - permissions.getGroup(), permissions.getMode())); - } - } - } + populateUfsStatus(keyPrefix, chunk, options.isRecursive(), children); chunk = chunk.getNextChunk(); } UfsStatus[] ret = new UfsStatus[children.size()]; @@ -1081,11 +1140,73 @@ protected UfsStatus[] listInternal(String path, ListOptions options) throws IOEx } /** - * Creates a directory flagged file with the key and folder suffix. - * - * @param key the key to create a folder - * @return true if the operation was successful, false otherwise + * The UFS status iterator that iterates the ufs statuses and fetches the chunk by lazy. */ + public class UfsStatusIterator implements Iterator { + private ObjectListingChunk mChunk; + private final String mKeyPrefix; + private final boolean mIsRecursive; + private Iterator mIterator = null; + private String mLastKey = null; + + /** + * Creates the iterator. + * @param path the path + * @param isRecursive if the listing is recursive + * @param firstChunk the first object listing chunk + */ + public UfsStatusIterator(String path, boolean isRecursive, ObjectListingChunk firstChunk) + throws IOException { + String keyPrefix = PathUtils.normalizePath(stripPrefixIfPresent(path), PATH_SEPARATOR); + keyPrefix = keyPrefix.equals(PATH_SEPARATOR) ? "" : keyPrefix; + mKeyPrefix = keyPrefix; + mIsRecursive = isRecursive; + mChunk = firstChunk; + updateIterator(); + } + + private void updateIterator() throws IOException { + NavigableMap ufsStatusMap = new TreeMap<>(); + populateUfsStatus(mKeyPrefix, mChunk, mIsRecursive, ufsStatusMap); + if (mLastKey != null) { + ufsStatusMap = ufsStatusMap.tailMap(mLastKey, false); + } + mIterator = Iterators.transform(ufsStatusMap.entrySet().iterator(), Map.Entry::getValue); + mLastKey = ufsStatusMap.isEmpty() ? null : ufsStatusMap.lastKey(); + } + + @Override + public boolean hasNext() { + if (mChunk == null) { + return false; + } + if (mIterator.hasNext()) { + return true; + } + if (Boolean.FALSE.equals(mChunk.hasNextChunk())) { + return false; + } + try { + mChunk = mChunk.getNextChunk(); + updateIterator(); + return hasNext(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public UfsStatus next() { + return mIterator.next(); + } + } + + /** + * Creates a directory flagged file with the key and folder suffix. + * + * @param key the key to create a folder + * @return true if the operation was successful, false otherwise + */ protected boolean mkdirsInternal(String key) { return createEmptyObject(convertToFolderName(stripPrefixIfPresent(key))); } diff --git a/core/common/src/main/java/alluxio/underfs/UfsClient.java b/core/common/src/main/java/alluxio/underfs/UfsClient.java new file mode 100644 index 000000000000..bd79d82acdcc --- /dev/null +++ b/core/common/src/main/java/alluxio/underfs/UfsClient.java @@ -0,0 +1,52 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs; + +import alluxio.file.options.DescendantType; +import alluxio.util.RateLimiter; + +import java.util.function.Consumer; +import javax.annotation.Nullable; + +/** + * The async UFS client interface. + */ +public interface UfsClient { + + /** + * Lists the ufs statuses for a given path. The {@link UfsStatus#getName()} + * function for the returned values should include the full path of each + * item from the UFS root (not including the bucket name for object stores). + * It differs from a traditional listing in that if the input variable + * checkStatus is true, the {@link UfsStatus} for the base path should + * be included at the start of the results. The function should return + * immediately, and perform the operation asynchronously. + * @param path the path in ufs + * @param continuationToken the continuation token + * @param startAfter the start after string where the loading starts from + * @param descendantType the load descendant type (NONE/ONE/ALL) + * @param checkStatus if true the call will perform a GetStatus on the path + * to see if an object exists, which should be returned + * as part of the result + * @param onComplete the callback when the load is complete + * @param onError the callback when the load encountered an error + */ + void performListingAsync( + String path, @Nullable String continuationToken, @Nullable String startAfter, + DescendantType descendantType, boolean checkStatus, Consumer onComplete, + Consumer onError); + + /** + * @return the rate limiter + */ + RateLimiter getRateLimiter(); +} diff --git a/core/common/src/main/java/alluxio/underfs/UfsLoadResult.java b/core/common/src/main/java/alluxio/underfs/UfsLoadResult.java new file mode 100644 index 000000000000..e11da19d1fa4 --- /dev/null +++ b/core/common/src/main/java/alluxio/underfs/UfsLoadResult.java @@ -0,0 +1,110 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs; + +import alluxio.AlluxioURI; + +import java.util.Optional; +import java.util.stream.Stream; +import javax.annotation.Nullable; + +/** + The UfsLoadResult represents the result of a load operation + on an Under File System (UFS). + It contains information about the loaded items, such as the count, + whether it is truncated or not, and the continuation token. + */ +public class UfsLoadResult { + + private final Stream mItems; + private final String mContinuationToken; + private final boolean mIsTruncated; + private final int mItemsCount; + private final AlluxioURI mLastItem; + private final boolean mFirstIsFile; + private final boolean mIsObjectStore; + + /** + * Constructs a new instance of {@link UfsLoadResult}. + * + * @param items the stream of loaded items + * @param itemsCount the count of loaded items + * @param continuationToken the continuation token for loading more items + * @param lastItem the URI of the last item that was loaded + * @param isTruncated whether the load operation was truncated due to reaching a limit + * @param firstIsFile whether the first item in the stream is a file + * @param isObjectStore whether the under file system is an object store + */ + public UfsLoadResult( + Stream items, int itemsCount, @Nullable String continuationToken, + @Nullable AlluxioURI lastItem, boolean isTruncated, boolean firstIsFile, + boolean isObjectStore) { + mItems = items; + mContinuationToken = continuationToken; + mIsTruncated = isTruncated; + mItemsCount = itemsCount; + mLastItem = lastItem; + mFirstIsFile = firstIsFile; + mIsObjectStore = isObjectStore; + } + + /** + * @return true if the under file system is an object store, false otherwise + */ + public boolean isIsObjectStore() { + return mIsObjectStore; + } + + /** + * @return true if the first item in the stream is a file, false otherwise + */ + public boolean isFirstFile() { + return mFirstIsFile; + } + + /** + * @return an optional containing the URI of the last item that was loaded, + * or empty if no items were loaded + */ + public Optional getLastItem() { + return Optional.ofNullable(mLastItem); + } + + /** + * @return the count of loaded items + */ + public int getItemsCount() { + return mItemsCount; + } + + /** + * @return true if the load operation was truncated, false otherwise + */ + public boolean isTruncated() { + return mIsTruncated; + } + + /** + * @return the stream of loaded items + */ + public Stream getItems() { + return mItems; + } + + /** + * @return the continuation token for loading more items, + * or null if there are no more items to load + */ + public String getContinuationToken() { + return mContinuationToken; + } +} diff --git a/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java b/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java index b490feff0103..4d13b422aa15 100755 --- a/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java +++ b/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java @@ -24,6 +24,7 @@ import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; @@ -37,6 +38,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import java.util.Map; import javax.annotation.Nullable; @@ -56,7 +58,7 @@ @PublicApi @ThreadSafe // TODO(adit); API calls should use a URI instead of a String wherever appropriate -public interface UnderFileSystem extends Closeable { +public interface UnderFileSystem extends Closeable, UfsClient { /** * The factory for the {@link UnderFileSystem}. */ @@ -422,7 +424,20 @@ default AlluxioConfiguration getConfiguration() throws IOException { * @return the file status * @throws FileNotFoundException when the path does not exist */ - UfsFileStatus getFileStatus(String path) throws IOException; + default UfsFileStatus getFileStatus(String path) throws IOException { + return getFileStatus(path, GetFileStatusOptions.defaults()); + } + + /** + * Gets the file status. The caller must already know the path is a file. This method will + * throw an exception if the path exists, but is a directory. + * + * @param path the path to the file + * @param options method options + * @return the file status + * @throws FileNotFoundException when the path does not exist + */ + UfsFileStatus getFileStatus(String path, GetFileStatusOptions options) throws IOException; /** * Gets the file status. @@ -461,9 +476,20 @@ default AlluxioConfiguration getConfiguration() throws IOException { * @param path the path to compute the fingerprint for * @return the string representing the fingerprint */ - default Fingerprint getParsedFingerprint(String path) { - return Fingerprint.parse(getFingerprint(path)); - } + Fingerprint getParsedFingerprint(String path); + + /** + * Same as {@link #getParsedFingerprint(String)} except, will use the given content hash + * as the {@link alluxio.underfs.Fingerprint.Tag#CONTENT_HASH} field of the fingerprint + * if non-null. This is intended to be used when the file is already in Alluxio and + * a fingerprint is being created based on that file where the content hash has already + * been computed. + * @param path the path to compute the fingerprint for + * @param contentHash is used as the {@link alluxio.underfs.Fingerprint.Tag#CONTENT_HASH} + * field when creating the fingerprint. + * @return the string representing the fingerprint + */ + Fingerprint getParsedFingerprint(String path, @Nullable String contentHash); /** * An {@link UnderFileSystem} may be composed of one or more "physical UFS"s. This method is used @@ -613,6 +639,20 @@ default Fingerprint getParsedFingerprint(String path) { @Nullable UfsStatus[] listStatus(String path, ListOptions options) throws IOException; + /** + * Lists the ufs statuses iteratively. + * + * @param path the abstract pathname to list + * @param options for list directory + * @param startAfter the start after token + * @param batchSize the batch size + * @return An iterator of ufs status. Returns + * {@code null} if this abstract pathname does not denote a directory. + */ + @Nullable + Iterator listStatusIterable( + String path, ListOptions options, String startAfter, int batchSize) throws IOException; + /** * Creates the directory named by this abstract pathname. If the folder already exists, the method * returns false. The method creates any necessary but nonexistent parent directories. diff --git a/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java b/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java index a3deff1d7541..54377d172982 100755 --- a/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java +++ b/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java @@ -17,7 +17,9 @@ import alluxio.collections.Pair; import alluxio.conf.AlluxioConfiguration; import alluxio.conf.PropertyKey; +import alluxio.exception.runtime.InternalRuntimeException; import alluxio.exception.status.UnimplementedException; +import alluxio.file.options.DescendantType; import alluxio.metrics.Metric; import alluxio.metrics.MetricInfo; import alluxio.metrics.MetricsSystem; @@ -28,21 +30,26 @@ import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; +import alluxio.util.RateLimiter; import alluxio.util.SecurityUtils; import com.codahale.metrics.Timer; import com.google.common.base.Preconditions; +import com.google.common.collect.Iterators; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.function.Consumer; import javax.annotation.Nullable; /** @@ -520,7 +527,8 @@ public String toString() { } @Override - public UfsFileStatus getFileStatus(final String path) throws IOException { + public UfsFileStatus getFileStatus(final String path, GetFileStatusOptions options) + throws IOException { return call(new UfsCallable() { @Override public UfsFileStatus call() throws IOException { @@ -609,6 +617,31 @@ public String toString() { } } + @Override + public Fingerprint getParsedFingerprint(String path, @Nullable String contentHash) { + try { + return call(new UfsCallable() { + @Override + public Fingerprint call() { + return mUnderFileSystem.getParsedFingerprint(path, contentHash); + } + + @Override + public String methodName() { + return "GetParsedFingerprint"; + } + + @Override + public String toString() { + return String.format("path=%s, contentHash=%s", path, contentHash); + } + }); + } catch (IOException e) { + // This is not possible. + return Fingerprint.INVALID_FINGERPRINT; + } + } + @Override public UfsMode getOperationMode(Map physicalUfsState) { return mUnderFileSystem.getOperationMode(physicalUfsState); @@ -790,6 +823,38 @@ public String toString() { }); } + @Override + public Iterator listStatusIterable( + String path, ListOptions options, String startAfter, + int batchSize) throws IOException { + return call(new UfsCallable>() { + @Override + public Iterator call() throws IOException { + Iterator result = + mUnderFileSystem.listStatusIterable(path, options, startAfter, batchSize); + return filterInvalidPaths(result, path); + } + + @Override + public String methodName() { + return "ListStatusIterable"; + } + + @Override + public String toString() { + return String.format("path=%s, options=%s", path, options); + } + }); + } + + @Nullable + Iterator filterInvalidPaths(Iterator statuses, String listedPath) { + if (statuses == null) { + return null; + } + return Iterators.filter(statuses, (it) -> !it.getName().contains("?")); + } + @Nullable private UfsStatus[] filterInvalidPaths(UfsStatus[] statuses, String listedPath) { // This is a temporary fix to prevent us from choking on paths containing '?'. @@ -1199,6 +1264,42 @@ public UnderFileSystem getUnderFileSystem() { return mUnderFileSystem; } + @Override + public void performListingAsync( + String path, @Nullable String continuationToken, @Nullable String startAfter, + DescendantType descendantType, boolean checkStatus, Consumer onComplete, + Consumer onError) { + try { + call(new UfsCallable() { + @Override + public Void call() { + mUnderFileSystem.performListingAsync(path, continuationToken, startAfter, + descendantType, checkStatus, onComplete, onError); + return null; + } + + @Override + public String methodName() { + return "PerformListingAsync"; + } + + @Override + public String toString() { + return String.format("path=%s, continuationToken=%s, startAfter=%s, descendantType=%s," + + " checkStatus=%s", + path, continuationToken, startAfter, descendantType, checkStatus); + } + }); + } catch (IOException e) { + throw new InternalRuntimeException("should not reach"); + } + } + + @Override + public RateLimiter getRateLimiter() { + return mUnderFileSystem.getRateLimiter(); + } + /** * Interface representing a callable to the under storage system which throws an * {@link IOException} if an error occurs during the external communication. diff --git a/core/common/src/main/java/alluxio/underfs/options/GetFileStatusOptions.java b/core/common/src/main/java/alluxio/underfs/options/GetFileStatusOptions.java new file mode 100644 index 000000000000..d9a55c73e691 --- /dev/null +++ b/core/common/src/main/java/alluxio/underfs/options/GetFileStatusOptions.java @@ -0,0 +1,42 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs.options; + +/** + * Method options for getting the status of a file in {@link alluxio.underfs.UnderFileSystem}. + */ +public class GetFileStatusOptions { + private boolean mIncludeRealContentHash = false; + + /** + * @return whether include real content hash + */ + public boolean isIncludeRealContentHash() { + return mIncludeRealContentHash; + } + + /** + * @param includeRealContentHash include real content hash flag value + * @return the updated options object + */ + public GetFileStatusOptions setIncludeRealContentHash(boolean includeRealContentHash) { + mIncludeRealContentHash = includeRealContentHash; + return this; + } + + /** + * @return the default {@link GetFileStatusOptions} + */ + public static GetFileStatusOptions defaults() { + return new GetFileStatusOptions(); + } +} diff --git a/core/common/src/main/java/alluxio/util/CommonUtils.java b/core/common/src/main/java/alluxio/util/CommonUtils.java index e6a774391141..600ecc4d5188 100644 --- a/core/common/src/main/java/alluxio/util/CommonUtils.java +++ b/core/common/src/main/java/alluxio/util/CommonUtils.java @@ -203,8 +203,7 @@ public static String argsToString(String separator, T... args) { * @return an array of strings */ public static String[] toStringArray(ArrayList src) { - String[] ret = new String[src.size()]; - return src.toArray(ret); + return src.toArray(new String[0]); } /** @@ -377,7 +376,7 @@ public static T waitForResult(String description, Supplier objectSupplier T value; long start = getCurrentMs(); int interval = options.getInterval(); - int timeout = options.getTimeoutMs(); + long timeout = options.getTimeoutMs(); while (condition.apply(value = objectSupplier.get()) != true) { if (timeout != WaitForOptions.NEVER && getCurrentMs() - start > timeout) { throw new TimeoutException("Timed out waiting for " + description + " options: " + options diff --git a/core/common/src/main/java/alluxio/util/IteratorUtils.java b/core/common/src/main/java/alluxio/util/IteratorUtils.java new file mode 100644 index 000000000000..a36fc8e8ea59 --- /dev/null +++ b/core/common/src/main/java/alluxio/util/IteratorUtils.java @@ -0,0 +1,32 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util; + +import java.util.Iterator; + +/** + * Util for iterators. + */ +public class IteratorUtils { + /** + * @param iterator the iterator + * @return the next element in the iterator or null if hasNext() returns false + * @param the type of elements returned by the iterator + */ + public static T nextOrNull(Iterator iterator) { + if (iterator.hasNext()) { + return iterator.next(); + } + return null; + } +} + diff --git a/core/common/src/main/java/alluxio/util/ObjectSizeCalculator.java b/core/common/src/main/java/alluxio/util/ObjectSizeCalculator.java index 45458c6a7526..e865bd52c1c7 100644 --- a/core/common/src/main/java/alluxio/util/ObjectSizeCalculator.java +++ b/core/common/src/main/java/alluxio/util/ObjectSizeCalculator.java @@ -459,7 +459,7 @@ public ClassSizeInfo(Class clazz) { } mFieldSize = fieldsSize; mObjectSize = roundTo(mObjectHeaderSize + fieldsSize, mObjectPadding); - mReferencedFields = referenceFields.toArray(new Field[referenceFields.size()]); + mReferencedFields = referenceFields.toArray(new Field[0]); } void visit(Object obj, ObjectSizeCalculator calc) { diff --git a/core/common/src/main/java/alluxio/util/RateLimiter.java b/core/common/src/main/java/alluxio/util/RateLimiter.java new file mode 100644 index 000000000000..c2fd231c4747 --- /dev/null +++ b/core/common/src/main/java/alluxio/util/RateLimiter.java @@ -0,0 +1,60 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util; + +import java.util.Optional; + +/** + * Used to limit the rate of operations. This rate limiter is not thread safe + * and the operations are non-blocking. It is used by acquiring a permit for + * each operation, then checking how the operation should wait by calling + * {@link RateLimiter#getWaitTimeNanos(long)}. + */ +public interface RateLimiter { + + /** + * Acquire a permit for the next operation. + * @return {@link Optional#empty()} if no waiting is needed, otherwise + * the value contained in the returned optional is the permit, which + * can be used in calls to {@link RateLimiter#getWaitTimeNanos} + * to see how long to wait for the operation to be ready. + */ + Optional acquire(); + + /** + * Checks how long is needed to wait for this permit to be ready. + * @param permit the permit returned by {@link RateLimiter#acquire()} + * @return the amount of time needed to wait in nanoseconds + */ + long getWaitTimeNanos(long permit); + + /** + * @param permitsPerSecond permits per second + * @return a rate limiter + */ + static RateLimiter createRateLimiter(long permitsPerSecond) { + if (permitsPerSecond <= 0) { + return new RateLimiter() { + @Override + public Optional acquire() { + return Optional.empty(); + } + + @Override + public long getWaitTimeNanos(long permit) { + return 0; + } + }; + } + return new SimpleRateLimiter(permitsPerSecond); + } +} diff --git a/core/common/src/main/java/alluxio/util/SimpleRateLimiter.java b/core/common/src/main/java/alluxio/util/SimpleRateLimiter.java new file mode 100644 index 000000000000..fb2959371155 --- /dev/null +++ b/core/common/src/main/java/alluxio/util/SimpleRateLimiter.java @@ -0,0 +1,65 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Ticker; + +import java.time.Duration; +import java.util.Optional; + +/** + * A basic implementation of {@link RateLimiter}. + */ +public class SimpleRateLimiter implements RateLimiter { + + final Ticker mTicker; + final long mMinDuration; + + long mLastAcquire = 0; + + SimpleRateLimiter(long permitsPerSecond) { + this(permitsPerSecond, new Ticker() { + @Override + public long read() { + return System.nanoTime(); + } + }); + } + + /** + * Creates a simple rate limiter for testing purpose. + * @param permitsPerSecond permits per second + * @param ticker the ticker + */ + @VisibleForTesting + public SimpleRateLimiter(long permitsPerSecond, Ticker ticker) { + mTicker = ticker; + mMinDuration = Duration.ofSeconds(1).toNanos() / permitsPerSecond; + } + + @Override + public long getWaitTimeNanos(long permit) { + return permit - mTicker.read(); + } + + @Override + public Optional acquire() { + long nxtElapsed = mTicker.read(); + if (nxtElapsed - mLastAcquire >= mMinDuration) { + mLastAcquire = nxtElapsed; + return Optional.empty(); + } + mLastAcquire += mMinDuration; + return Optional.of(mLastAcquire); + } +} diff --git a/core/common/src/main/java/alluxio/util/ThreadUtils.java b/core/common/src/main/java/alluxio/util/ThreadUtils.java index 8600bf4d60ca..966835c818bc 100644 --- a/core/common/src/main/java/alluxio/util/ThreadUtils.java +++ b/core/common/src/main/java/alluxio/util/ThreadUtils.java @@ -116,44 +116,24 @@ private static String getTaskName(long id, String name) { /** * Prints the information and stack traces of all threads. + * In order not to pause the JVM when there are tons of threads, thread stacks are printed + * one by one. So the thread stacks are not guaranteed to be based on one consistent + * snapshot. * * @param stream the stream to * @param title a string title for the stack trace */ - public static synchronized void printThreadInfo(PrintStream stream, - String title) { - final int STACK_DEPTH = 20; - boolean contention = THREAD_BEAN.isThreadContentionMonitoringEnabled(); - long[] threadIds = THREAD_BEAN.getAllThreadIds(); + public static synchronized void printThreadInfo(PrintStream stream, String title) { stream.println("Process Thread Dump: " + title); - stream.println(threadIds.length + " active threads"); - for (long tid : threadIds) { - ThreadInfo info = THREAD_BEAN.getThreadInfo(tid, STACK_DEPTH); + stream.println(THREAD_BEAN.getThreadCount() + " active threads"); + long[] threadIds = THREAD_BEAN.getAllThreadIds(); + for (long id : threadIds) { + ThreadInfo info = THREAD_BEAN.getThreadInfo(id, Integer.MAX_VALUE); if (info == null) { - stream.println(" Inactive"); + // The thread is no longer active, ignore continue; } - stream.println("Thread " - + getTaskName(info.getThreadId(), info.getThreadName()) + ":"); - Thread.State state = info.getThreadState(); - stream.println(" State: " + state); - stream.println(" Blocked count: " + info.getBlockedCount()); - stream.println(" Waited count: " + info.getWaitedCount()); - if (contention) { - stream.println(" Blocked time: " + info.getBlockedTime()); - stream.println(" Waited time: " + info.getWaitedTime()); - } - if (state == Thread.State.WAITING) { - stream.println(" Waiting on " + info.getLockName()); - } else if (state == Thread.State.BLOCKED) { - stream.println(" Blocked on " + info.getLockName()); - stream.println(" Blocked by " - + getTaskName(info.getLockOwnerId(), info.getLockOwnerName())); - } - stream.println(" Stack:"); - for (StackTraceElement frame : info.getStackTrace()) { - stream.println(" " + frame.toString()); - } + stream.print(info.toString()); } stream.flush(); } diff --git a/core/common/src/main/java/alluxio/util/WaitForOptions.java b/core/common/src/main/java/alluxio/util/WaitForOptions.java index 04d9bf72b3cf..d69ad7909eb9 100644 --- a/core/common/src/main/java/alluxio/util/WaitForOptions.java +++ b/core/common/src/main/java/alluxio/util/WaitForOptions.java @@ -24,7 +24,7 @@ public final class WaitForOptions { /** How often to check for completion. */ private int mIntervalMs; /** How long to wait before giving up. */ - private int mTimeoutMs; + private long mTimeoutMs; private WaitForOptions() {} @@ -45,7 +45,7 @@ public int getInterval() { /** * @return the timeout */ - public int getTimeoutMs() { + public long getTimeoutMs() { return mTimeoutMs; } @@ -62,7 +62,7 @@ public WaitForOptions setInterval(int intervalMs) { * @param timeoutMs the timeout to use (in milliseconds) * @return the updated options object */ - public WaitForOptions setTimeoutMs(int timeoutMs) { + public WaitForOptions setTimeoutMs(long timeoutMs) { mTimeoutMs = timeoutMs; return this; } diff --git a/core/common/src/main/java/alluxio/util/compression/DirectoryMarshaller.java b/core/common/src/main/java/alluxio/util/compression/DirectoryMarshaller.java new file mode 100644 index 000000000000..d03bc19d5a8c --- /dev/null +++ b/core/common/src/main/java/alluxio/util/compression/DirectoryMarshaller.java @@ -0,0 +1,72 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.compression; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; + +/** + * Interface for a directory marshaller to follow. + */ +public interface DirectoryMarshaller { + /** + * Writes the contents of path ot outputStream such that it can be read by + * {@link #read(Path, InputStream)}. + * @param path the directory to marshall + * @param outputStream the output stream that the marshalled information + * @return the number of bytes read in path + */ + long write(Path path, OutputStream outputStream) throws IOException, InterruptedException; + + /** + * Reads the content from the inputStream and writes them to the specified path. + * @param path the output path + * @param inputStream the stream to read the data from + * @return the number of bytes written to path + */ + long read(Path path, InputStream inputStream) throws IOException; + + /** + * An enum to represent the different {@link DirectoryMarshaller} types. + */ + enum Type { + NO_COMPRESSION, + GZIP, + TAR_GZIP, + } + + /** + * Factory to access the DirectoryMarshaller. + */ + class Factory { + /** + * @return a {@link DirectoryMarshaller} + */ + public static DirectoryMarshaller create() { + Type compressionType = Configuration.getEnum( + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_TYPE, Type.class); + switch (compressionType) { + case GZIP: + return new GzipMarshaller(); + case TAR_GZIP: + return new TarGzMarshaller(); + default: + return new NoCompressionMarshaller(); + } + } + } +} diff --git a/core/common/src/main/java/alluxio/util/compression/GzipMarshaller.java b/core/common/src/main/java/alluxio/util/compression/GzipMarshaller.java new file mode 100644 index 000000000000..a0693d9461a4 --- /dev/null +++ b/core/common/src/main/java/alluxio/util/compression/GzipMarshaller.java @@ -0,0 +1,49 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.compression; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; +import org.apache.commons.compress.compressors.gzip.GzipParameters; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; + +/** + * Applies a simple Gzip compression to the {@link NoCompressionMarshaller}. + */ +public class GzipMarshaller implements DirectoryMarshaller { + private final int mSnapshotCompressionLevel = Configuration.getInt( + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL); + private final NoCompressionMarshaller mMarshaller = new NoCompressionMarshaller(); + + @Override + public long write(Path path, OutputStream outputStream) throws IOException, InterruptedException { + GzipParameters params = new GzipParameters(); + params.setCompressionLevel(mSnapshotCompressionLevel); + GzipCompressorOutputStream zipStream = new GzipCompressorOutputStream(outputStream, params); + long bytes = mMarshaller.write(path, zipStream); + zipStream.finish(); + return bytes; + } + + @Override + public long read(Path path, InputStream inputStream) throws IOException { + InputStream zipStream = new GzipCompressorInputStream(inputStream); + return mMarshaller.read(path, zipStream); + } +} diff --git a/core/common/src/main/java/alluxio/util/compression/NoCompressionMarshaller.java b/core/common/src/main/java/alluxio/util/compression/NoCompressionMarshaller.java new file mode 100644 index 000000000000..fcc7e4f735fc --- /dev/null +++ b/core/common/src/main/java/alluxio/util/compression/NoCompressionMarshaller.java @@ -0,0 +1,85 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.compression; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Simple marshaller that applies no compression. + */ +public class NoCompressionMarshaller implements DirectoryMarshaller { + private static final char DIR_CHAR = 'd'; + private static final char FILE_CHAR = 'f'; + + @Override + public long write(Path path, OutputStream outputStream) throws IOException, InterruptedException { + long totalBytesCopied = 0; + try (final Stream stream = Files.walk(path); + DataOutputStream dataOS = new DataOutputStream(outputStream)) { + for (Path subpath : stream.collect(Collectors.toList())) { + byte[] relativePath = path.relativize(subpath).toString().getBytes(); + dataOS.write(relativePath.length); + dataOS.write(relativePath); + if (subpath.toFile().isDirectory()) { + dataOS.writeChar(DIR_CHAR); + } else { + dataOS.writeChar(FILE_CHAR); + dataOS.writeLong(FileUtils.sizeOf(subpath.toFile())); + try (InputStream fileIn = new BufferedInputStream(Files.newInputStream(subpath))) { + totalBytesCopied += IOUtils.copyLarge(fileIn, dataOS); + } + } + } + } + return totalBytesCopied; + } + + @Override + public long read(Path path, InputStream inputStream) throws IOException { + path.toFile().mkdirs(); + long totalBytesRead = 0; + try (DataInputStream dataIS = new DataInputStream(inputStream)) { + int pathSize; + while ((pathSize = dataIS.read()) != -1) { + byte[] relativePath = new byte[pathSize]; + dataIS.read(relativePath); + File filePath = new File(path.toFile(), new String(relativePath)); + char c = dataIS.readChar(); + if (c == DIR_CHAR) { + filePath.mkdirs(); + } else { + filePath.getParentFile().mkdirs(); + long fileSize = dataIS.readLong(); + try (OutputStream fileOut = + new BufferedOutputStream(Files.newOutputStream(filePath.toPath()))) { + totalBytesRead += IOUtils.copyLarge(dataIS, fileOut, 0, fileSize); + } + } + } + } + return totalBytesRead; + } +} diff --git a/core/server/common/src/main/java/alluxio/util/ParallelZipUtils.java b/core/common/src/main/java/alluxio/util/compression/ParallelZipUtils.java similarity index 89% rename from core/server/common/src/main/java/alluxio/util/ParallelZipUtils.java rename to core/common/src/main/java/alluxio/util/compression/ParallelZipUtils.java index 3371bb1fbaf6..7258c783ef9b 100644 --- a/core/server/common/src/main/java/alluxio/util/ParallelZipUtils.java +++ b/core/common/src/main/java/alluxio/util/compression/ParallelZipUtils.java @@ -9,7 +9,7 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.util; +package alluxio.util.compression; import static java.util.stream.Collectors.toList; @@ -22,7 +22,10 @@ import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; import org.apache.commons.compress.archivers.zip.ZipFile; +import org.apache.commons.compress.parallel.FileBasedScatterGatherBackingStore; import org.apache.commons.compress.parallel.InputStreamSupplier; +import org.apache.commons.compress.parallel.ScatterGatherBackingStore; +import org.apache.commons.compress.parallel.ScatterGatherBackingStoreSupplier; import org.apache.commons.io.IOUtils; import org.apache.commons.io.input.NullInputStream; import org.slf4j.Logger; @@ -42,6 +45,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; +import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Stream; /** @@ -50,6 +54,17 @@ public class ParallelZipUtils { private static final Logger LOG = LoggerFactory.getLogger(ParallelZipUtils.class); + private static class BasicBackingStoreSupplier implements ScatterGatherBackingStoreSupplier { + final AtomicInteger mStoreNum = new AtomicInteger(0); + + @Override + public ScatterGatherBackingStore get() throws IOException { + final File tempFile = File.createTempFile("zipUtilsParallelScatter", "n" + + mStoreNum.incrementAndGet()); + return new FileBasedScatterGatherBackingStore(tempFile); + } + } + /** * Creates a zipped archive from the given path in parallel, streaming the data * to the give output stream. @@ -67,10 +82,11 @@ public static void compress(Path dirPath, OutputStream outputStream, int poolSiz LOG.info("compress in parallel for path {}", dirPath); ExecutorService executor = ExecutorServiceFactories.fixedThreadPool( "parallel-zip-compress-pool", poolSize).create(); - ParallelScatterZipCreator parallelScatterZipCreator = new ParallelScatterZipCreator(executor); + + ParallelScatterZipCreator parallelScatterZipCreator = new ParallelScatterZipCreator(executor, + new BasicBackingStoreSupplier(), compressionLevel); ZipArchiveOutputStream zipArchiveOutputStream = new ZipArchiveOutputStream(outputStream); zipArchiveOutputStream.setUseZip64(Zip64Mode.Always); - zipArchiveOutputStream.setLevel(compressionLevel); try { try (final Stream stream = Files.walk(dirPath)) { diff --git a/core/common/src/main/java/alluxio/util/compression/TarGzMarshaller.java b/core/common/src/main/java/alluxio/util/compression/TarGzMarshaller.java new file mode 100644 index 000000000000..59597c417134 --- /dev/null +++ b/core/common/src/main/java/alluxio/util/compression/TarGzMarshaller.java @@ -0,0 +1,38 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.compression; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; + +/** + * Marshall directory following the .tar.gz specification. + */ +public class TarGzMarshaller implements DirectoryMarshaller { + private final int mSnapshotCompressionLevel = Configuration.getInt( + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL); + + @Override + public long write(Path path, OutputStream outputStream) throws IOException, InterruptedException { + return TarUtils.writeTarGz(path, outputStream, mSnapshotCompressionLevel); + } + + @Override + public long read(Path path, InputStream inputStream) throws IOException { + return TarUtils.readTarGz(path, inputStream); + } +} diff --git a/core/server/common/src/main/java/alluxio/util/TarUtils.java b/core/common/src/main/java/alluxio/util/compression/TarUtils.java similarity index 71% rename from core/server/common/src/main/java/alluxio/util/TarUtils.java rename to core/common/src/main/java/alluxio/util/compression/TarUtils.java index e1822c9bf80b..e75e965bfa70 100644 --- a/core/server/common/src/main/java/alluxio/util/TarUtils.java +++ b/core/common/src/main/java/alluxio/util/compression/TarUtils.java @@ -9,7 +9,7 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.util; +package alluxio.util.compression; import static java.util.stream.Collectors.toList; @@ -18,10 +18,12 @@ import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; +import org.apache.commons.compress.compressors.gzip.GzipParameters; import org.apache.commons.io.IOUtils; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -38,14 +40,20 @@ public final class TarUtils { * stream. * * @param dirPath the path to archive + * @param compressionLevel the compression level to use (0 for no compression, 9 for the most + * compression, or -1 for system default) * @param output the output stream to write the data to + * @return the number of bytes copied from the directory into the archive */ - public static void writeTarGz(Path dirPath, OutputStream output) + public static long writeTarGz(Path dirPath, OutputStream output, int compressionLevel) throws IOException, InterruptedException { - GzipCompressorOutputStream zipStream = new GzipCompressorOutputStream(output); + GzipParameters params = new GzipParameters(); + params.setCompressionLevel(compressionLevel); + GzipCompressorOutputStream zipStream = new GzipCompressorOutputStream(output, params); TarArchiveOutputStream archiveStream = new TarArchiveOutputStream(zipStream); archiveStream.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX); archiveStream.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX); + long totalBytesCopied = 0; try (final Stream stream = Files.walk(dirPath)) { for (Path subPath : stream.collect(toList())) { if (Thread.interrupted()) { @@ -55,8 +63,8 @@ public static void writeTarGz(Path dirPath, OutputStream output) TarArchiveEntry entry = new TarArchiveEntry(file, dirPath.relativize(subPath).toString()); archiveStream.putArchiveEntry(entry); if (file.isFile()) { - try (InputStream fileIn = Files.newInputStream(subPath)) { - IOUtils.copy(fileIn, archiveStream); + try (InputStream fileIn = new BufferedInputStream(Files.newInputStream(subPath))) { + totalBytesCopied += IOUtils.copyLarge(fileIn, archiveStream); } } archiveStream.closeArchiveEntry(); @@ -64,6 +72,7 @@ public static void writeTarGz(Path dirPath, OutputStream output) } archiveStream.finish(); zipStream.finish(); + return totalBytesCopied; } /** @@ -71,22 +80,26 @@ public static void writeTarGz(Path dirPath, OutputStream output) * * @param dirPath the path to write the archive to * @param input the input stream + * @return the number of bytes copied from the archive in the directory */ - public static void readTarGz(Path dirPath, InputStream input) throws IOException { + public static long readTarGz(Path dirPath, InputStream input) throws IOException { InputStream zipStream = new GzipCompressorInputStream(input); TarArchiveInputStream archiveStream = new TarArchiveInputStream(zipStream); TarArchiveEntry entry; + long totalBytesCopied = 0; while ((entry = (TarArchiveEntry) archiveStream.getNextEntry()) != null) { File outputFile = new File(dirPath.toFile(), entry.getName()); if (entry.isDirectory()) { outputFile.mkdirs(); } else { outputFile.getParentFile().mkdirs(); - try (FileOutputStream fileOut = new FileOutputStream(outputFile)) { - IOUtils.copy(archiveStream, fileOut); + try (OutputStream fileOut = + new BufferedOutputStream(Files.newOutputStream(outputFile.toPath()))) { + totalBytesCopied += IOUtils.copyLarge(archiveStream, fileOut); } } } + return totalBytesCopied; } private TarUtils() {} // Utils class diff --git a/core/common/src/main/java/alluxio/util/io/FileUtils.java b/core/common/src/main/java/alluxio/util/io/FileUtils.java index b53562c20e45..ca55fbcc873a 100644 --- a/core/common/src/main/java/alluxio/util/io/FileUtils.java +++ b/core/common/src/main/java/alluxio/util/io/FileUtils.java @@ -107,30 +107,6 @@ public static void changeLocalFileToFullPermission(String filePath) { changeLocalFilePermission(filePath, "rwxrwxrwx"); } - /** - * Gets local file's owner. - * - * @param filePath the file path - * @return the owner of the local file - */ - public static String getLocalFileOwner(String filePath) throws IOException { - PosixFileAttributes attr = - Files.readAttributes(Paths.get(filePath), PosixFileAttributes.class); - return attr.owner().getName(); - } - - /** - * Gets local file's group. - * - * @param filePath the file path - * @return the group of the local file - */ - public static String getLocalFileGroup(String filePath) throws IOException { - PosixFileAttributes attr = - Files.readAttributes(Paths.get(filePath), PosixFileAttributes.class); - return attr.group().getName(); - } - /** * Gets local file's permission mode. * @@ -293,6 +269,25 @@ public static void delete(String path) { } } + /** + * Deletes the file or directory, if it exists. + * + * @param path pathname string of file or directory + */ + public static void deleteIfExists(String path) { + try { + Files.deleteIfExists(Paths.get(path)); + } catch (java.nio.file.InvalidPathException e) { + throw new InvalidArgumentRuntimeException(e); + } catch (DirectoryNotEmptyException e) { + throw new FailedPreconditionRuntimeException(e); + } catch (SecurityException e) { + throw new PermissionDeniedRuntimeException(e); + } catch (IOException e) { + throw new UnknownRuntimeException(e); + } + } + /** * Deletes a file or a directory, recursively if it is a directory. * diff --git a/core/common/src/main/java/alluxio/util/io/PathUtils.java b/core/common/src/main/java/alluxio/util/io/PathUtils.java index c95240b063f1..17dcbd3e50e0 100644 --- a/core/common/src/main/java/alluxio/util/io/PathUtils.java +++ b/core/common/src/main/java/alluxio/util/io/PathUtils.java @@ -22,8 +22,10 @@ import com.google.common.base.Preconditions; import org.apache.commons.io.FilenameUtils; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.List; import java.util.UUID; import java.util.regex.Pattern; import javax.annotation.concurrent.ThreadSafe; @@ -191,6 +193,29 @@ public static String getParentCleaned(String cleanedPath) throws InvalidPathExce return parent; } + /** + * Gets the first level directory of the path. + * For example, + * + *

+   * {@code
+   * getFirstLevelDirectory("/a/xx/").equals("/a");
+   * getFirstLevelDirectory("/a/").equals("/a");
+   * }
+   * 
+ * + * @param path the path + * @return the first level directory of the path; + * @throws InvalidPathException if the path is the root or invalid + */ + public static String getFirstLevelDirectory(String path) throws InvalidPathException { + String[] paths = getPathComponents(path); + if (paths.length < 2) { + throw new InvalidPathException(path + " has no first level directory"); + } + return AlluxioURI.SEPARATOR + paths[1]; + } + /** * Join two path elements for ufs, separated by {@link AlluxioURI#SEPARATOR}. * @@ -321,9 +346,27 @@ public static String subtractPaths(String path, String prefix) throws InvalidPat * @throws InvalidPathException when the path or prefix is invalid */ public static boolean hasPrefix(String path, String prefix) throws InvalidPathException { + return hasPrefix(path, prefix, true); + } + + /** + * Checks whether the given path contains the given prefix. The comparison happens at a component + * granularity; for example, {@code hasPrefix(/dir/file, /dir)} should evaluate to true, while + * {@code hasPrefix(/dir/file, /d)} should evaluate to false. + * + * @param path a path + * @param prefix a prefix + * @param cleanPath if the paths should be cleaned + * @return whether the given path has the given prefix + * @throws InvalidPathException when the path or prefix is invalid + */ + public static boolean hasPrefix(String path, String prefix, boolean cleanPath) + throws InvalidPathException { // normalize path and prefix(e.g. "/a/b/../c" -> "/a/c", "/a/b/" --> "/a/b") - path = cleanPath(path); - prefix = cleanPath(prefix); + if (cleanPath) { + path = cleanPath(path); + prefix = cleanPath(prefix); + } if (prefix.equals("/")) { return true; @@ -420,5 +463,39 @@ public static String normalizePath(String path, String separator) { return path.endsWith(separator) ? path : path + separator; } + /** + * Adds a starting separator if it does not exist in path. + * + * @param path the file name + * @param separator trailing separator to add + * @return updated path with trailing separator + */ + public static String normalizePathStart( + String path, String separator) { + return path.startsWith(separator) ? path : separator + path; + } + private PathUtils() {} // prevent instantiation + + /** + * Returns the list of possible mount points of the given path. + * + * "/a/b/c" => {"/a", "/a/b", "/a/b/c"} + * + * @param path the path to get the mount points of + * @return a list of paths + */ + public static List getPossibleMountPoints(String path) throws InvalidPathException { + String basePath = cleanPath(path); + List paths = new ArrayList<>(); + if ((basePath != null) && !basePath.equals(AlluxioURI.SEPARATOR)) { + paths.add(basePath); + String parent = getParent(path); + while (!parent.equals(AlluxioURI.SEPARATOR)) { + paths.add(0, parent); + parent = getParent(parent); + } + } + return paths; + } } diff --git a/core/common/src/main/java/alluxio/util/network/NetworkAddressUtils.java b/core/common/src/main/java/alluxio/util/network/NetworkAddressUtils.java index 704ffb5100bc..05fca8148ac6 100644 --- a/core/common/src/main/java/alluxio/util/network/NetworkAddressUtils.java +++ b/core/common/src/main/java/alluxio/util/network/NetworkAddressUtils.java @@ -370,7 +370,7 @@ public static InetSocketAddress getBindAddress(ServiceAttributeProvider service, AlluxioConfiguration conf) { int port = getPort(service, conf); assertValidPort(port); - return new InetSocketAddress(getBindHost(service, conf), getPort(service, conf)); + return new InetSocketAddress(getBindHost(service, conf), port); } /** diff --git a/core/common/src/main/java/alluxio/util/proto/BlockLocationUtils.java b/core/common/src/main/java/alluxio/util/proto/BlockLocationUtils.java new file mode 100644 index 000000000000..6105a26b9213 --- /dev/null +++ b/core/common/src/main/java/alluxio/util/proto/BlockLocationUtils.java @@ -0,0 +1,94 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.proto; + +import alluxio.collections.IndexDefinition; +import alluxio.collections.IndexedSet; +import alluxio.proto.meta.Block.BlockLocation; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; + +import java.util.Set; + +/** + * An util class to create cached grpc block locations. + */ +public class BlockLocationUtils { + private static final IndexDefinition OBJECT_INDEX = + IndexDefinition.ofUnique((b) -> b); + + private static final IndexDefinition WORKER_ID_INDEX = + IndexDefinition.ofNonUnique(BlockLocation::getWorkerId); + + private static final IndexedSet BLOCK_LOCATION_CACHE = + new IndexedSet<>(OBJECT_INDEX, WORKER_ID_INDEX); + + private static final Set VALID_MEDIUM_TYPE_VALUES = + Sets.newHashSet("MEM", "HDD", "SSD"); + + /** + * Get a shared grpc block location object. If it does not exist, create and cache it. + * Because the valid values of tierAlias and mediumType are only MEM, SSD and HDD, + * The size of the cache map is limited. + * + * @param workerId the worker id + * @param tierAlias the tier alias + * @param mediumType the medium type + * @return a shared block location object from the cache + */ + public static BlockLocation getCached( + long workerId, String tierAlias, String mediumType) { + BlockLocation location = BlockLocation + .newBuilder() + .setWorkerId(workerId) + .setTier(tierAlias) + .setMediumType(mediumType) + .build(); + return getCached(location); + } + + /** + * Get a shared grpc block location object. If it does not exist, create and cache it. + * Because the valid values of tierAlias and mediumType are only MEM, SSD and HDD, + * The size of the cache map is limited. + * + * @param blockLocation the block location to cache + * @return a shared block location object from the cache + */ + public static BlockLocation getCached(BlockLocation blockLocation) { + Preconditions.checkState(VALID_MEDIUM_TYPE_VALUES.contains(blockLocation.getTier()), + "TierAlias must be one of {MEM, HDD and SSD} but got %s", + blockLocation.getTier()); + Preconditions.checkState(VALID_MEDIUM_TYPE_VALUES.contains(blockLocation.getMediumType()), + "MediumType must be one of {MEM, HDD and SSD} but got %s", + blockLocation.getMediumType()); + BLOCK_LOCATION_CACHE.add(blockLocation); + return BLOCK_LOCATION_CACHE.getFirstByField(OBJECT_INDEX, blockLocation); + } + + /** + * Evict cache entries by worker id. + * @param workerId the worker id + */ + public static void evictByWorkerId(long workerId) { + BLOCK_LOCATION_CACHE.removeByField(WORKER_ID_INDEX, workerId); + } + + /** + * Gets the cached block location size. + * @return the cached block location size + */ + public static int getCachedBlockLocationSize() { + return BLOCK_LOCATION_CACHE.size(); + } +} diff --git a/core/common/src/main/java/alluxio/util/webui/UIMasterInfo.java b/core/common/src/main/java/alluxio/util/webui/UIMasterInfo.java deleted file mode 100644 index e391b4a3b53e..000000000000 --- a/core/common/src/main/java/alluxio/util/webui/UIMasterInfo.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.util.webui; - -import alluxio.util.CommonUtils; - -/** - * Displays information about a master in the UI. - */ -public class UIMasterInfo { - private final String mMasterAddress; - private final long mId; - private final long mLastUpdatedTimeMs; - - /** - * Creates a new instance of {@link UIMasterInfo}. - * - * @param masterAddress The master address - * @param id The master id - * @param lastUpdatedTimeMs The last heart beat in ms - */ - public UIMasterInfo(String masterAddress, long id, long lastUpdatedTimeMs) { - mMasterAddress = masterAddress; - mId = id; - mLastUpdatedTimeMs = lastUpdatedTimeMs; - } - - /** - * Gets master address. - * - * @return the master address - */ - public String getAddress() { - return mMasterAddress; - } - - /** - * Get id. - * - * @return the id - */ - public String getId() { - return Long.toString(mId); - } - - /** - * Get master last update time. - * - * @return the master last update time - */ - public String getLastUpdatedTime() { - return CommonUtils.convertMsToClockTime(mLastUpdatedTimeMs); - } -} diff --git a/core/common/src/main/java/alluxio/wire/AlluxioMasterInfo.java b/core/common/src/main/java/alluxio/wire/AlluxioMasterInfo.java index 213b292ab763..a8d0991330f2 100644 --- a/core/common/src/main/java/alluxio/wire/AlluxioMasterInfo.java +++ b/core/common/src/main/java/alluxio/wire/AlluxioMasterInfo.java @@ -34,6 +34,7 @@ public class AlluxioMasterInfo { private Capacity mUfsCapacity; private long mUptimeMs; private String mVersion; + private String mRevision; private List mWorkers; /** @@ -118,6 +119,15 @@ public String getVersion() { return mVersion; } + /** + * Gets revision. + * + * @return the revision + */ + public String getRevision() { + return mRevision; + } + /** * @return the list of workers */ @@ -224,6 +234,15 @@ public AlluxioMasterInfo setVersion(String version) { return this; } + /** + * @param revision the revision to use + * @return the Alluxio master information + */ + public AlluxioMasterInfo setRevision(String revision) { + mRevision = revision; + return this; + } + /** * @param workers the list of workers to use * @return the Alluxio master information @@ -253,6 +272,7 @@ public boolean equals(Object o) { && Objects.equal(mUfsCapacity, that.mUfsCapacity) && mUptimeMs == that.mUptimeMs && Objects.equal(mVersion, that.mVersion) + && Objects.equal(mRevision, that.mRevision) && Objects.equal(mWorkers, that.mWorkers); } @@ -261,7 +281,7 @@ public int hashCode() { return Objects .hashCode(mCapacity, mConfiguration, mLostWorkers, mMetrics, mMountPoints, mRpcAddress, mStartTimeMs, mTierCapacity, mUfsCapacity, mUptimeMs, - mVersion, mWorkers); + mVersion, mRevision, mWorkers); } @Override @@ -278,6 +298,7 @@ public String toString() { .add("ufs capacity", mUfsCapacity) .add("uptime", mUptimeMs) .add("version", mVersion) + .add("revision", mRevision) .add("workers", mWorkers) .toString(); } diff --git a/core/common/src/main/java/alluxio/wire/AlluxioWorkerInfo.java b/core/common/src/main/java/alluxio/wire/AlluxioWorkerInfo.java index e72ea04d4347..ab8348cab760 100644 --- a/core/common/src/main/java/alluxio/wire/AlluxioWorkerInfo.java +++ b/core/common/src/main/java/alluxio/wire/AlluxioWorkerInfo.java @@ -32,6 +32,7 @@ public class AlluxioWorkerInfo { private Map> mTierPaths; private long mUptimeMs; private String mVersion; + private String mRevision; /** * Creates a new instance of {@link AlluxioWorkerInfo}. @@ -101,6 +102,15 @@ public String getVersion() { return mVersion; } + /** + * Gets revision. + * + * @return the revision + */ + public String getRevision() { + return mRevision; + } + /** * @param capacity the capacity to use * @return the Alluxio worker information @@ -182,6 +192,15 @@ public AlluxioWorkerInfo setVersion(String version) { return this; } + /** + * @param revision the revision to use + * @return the Alluxio worker information + */ + public AlluxioWorkerInfo setRevision(String revision) { + mRevision = revision; + return this; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -199,14 +218,15 @@ public boolean equals(Object o) { && Objects.equal(mTierCapacity, that.mTierCapacity) && Objects.equal(mTierPaths, that.mTierPaths) && mUptimeMs == that.mUptimeMs - && Objects.equal(mVersion, that.mVersion); + && Objects.equal(mVersion, that.mVersion) + && Objects.equal(mRevision, that.mRevision); } @Override public int hashCode() { return Objects .hashCode(mCapacity, mConfiguration, mMetrics, mRpcAddress, mStartTimeMs, mTierCapacity, - mTierPaths, mUptimeMs, mVersion); + mTierPaths, mUptimeMs, mVersion, mRevision); } @Override @@ -220,6 +240,8 @@ public String toString() { .add("tier capacity", mTierCapacity) .add("tier paths", mTierPaths) .add("uptime", mUptimeMs) - .add("version", mVersion).toString(); + .add("version", mVersion) + .add("revision", mRevision) + .toString(); } } diff --git a/core/common/src/main/java/alluxio/wire/ConfigHash.java b/core/common/src/main/java/alluxio/wire/ConfigHash.java index b41a60e09c46..cf4b4aafab2d 100644 --- a/core/common/src/main/java/alluxio/wire/ConfigHash.java +++ b/core/common/src/main/java/alluxio/wire/ConfigHash.java @@ -11,7 +11,9 @@ package alluxio.wire; +import alluxio.conf.PropertyKey; import alluxio.grpc.GetConfigHashPResponse; +import alluxio.util.CommonUtils; import com.google.common.base.Objects; import com.google.common.base.Preconditions; @@ -25,23 +27,29 @@ public class ConfigHash { private final String mClusterConfigHash; private final String mPathConfigHash; + private long mClusterConfigLastUpdateTime; + private long mPathConfigLastUpdateTime; /** * Constructs a new ConfigHash. * * @param clusterConfigHash cluster configuration hash, cannot be null * @param pathConfigHash path configuration hash, cannot be null + * @param clusterConfigLastUpdateTime the cluster config last update time + * @param pathConfigLastUpdateTime path config last update time */ - public ConfigHash(String clusterConfigHash, String pathConfigHash) { + public ConfigHash(String clusterConfigHash, String pathConfigHash, + long clusterConfigLastUpdateTime, long pathConfigLastUpdateTime) { Preconditions.checkNotNull(clusterConfigHash, "clusterConfigHash"); Preconditions.checkNotNull(pathConfigHash, "pathConfigHash"); mClusterConfigHash = clusterConfigHash; mPathConfigHash = pathConfigHash; + mClusterConfigLastUpdateTime = clusterConfigLastUpdateTime; + mPathConfigLastUpdateTime = pathConfigLastUpdateTime; } private ConfigHash(GetConfigHashPResponse response) { - mClusterConfigHash = response.getClusterConfigHash(); - mPathConfigHash = response.getPathConfigHash(); + this(response.getClusterConfigHash(), response.getPathConfigHash(), 0, 0); } /** @@ -80,6 +88,36 @@ public String getPathConfigHash() { return mPathConfigHash; } + /** + * @return cluster config last update time + */ + public long getClusterConfigLastUpdateTime() { + return mClusterConfigLastUpdateTime; + } + + /** + * @return path config last update time + */ + public long getPathConfigLastUpdateTime() { + return mPathConfigLastUpdateTime; + } + + /** + * @return cluster config last update time text + */ + public String getClusterConfigLastUpdateTimeText() { + return CommonUtils.convertMsToDate(mClusterConfigLastUpdateTime, + alluxio.conf.Configuration.getString(PropertyKey.USER_DATE_FORMAT_PATTERN)); + } + + /** + * @return path config last update time text + */ + public String getPathConfigLastUpdateTimeText() { + return CommonUtils.convertMsToDate(mPathConfigLastUpdateTime, + alluxio.conf.Configuration.getString(PropertyKey.USER_DATE_FORMAT_PATTERN)); + } + @Override public boolean equals(Object o) { if (this == o) { diff --git a/core/common/src/main/java/alluxio/wire/Configuration.java b/core/common/src/main/java/alluxio/wire/Configuration.java index 566af58b2b3a..be3f790e987f 100644 --- a/core/common/src/main/java/alluxio/wire/Configuration.java +++ b/core/common/src/main/java/alluxio/wire/Configuration.java @@ -40,6 +40,10 @@ public final class Configuration { private final String mClusterConfHash; /** Path configuration hash. */ private final String mPathConfHash; + /** Cluster configuration last update time. */ + private final long mClusterConfLastUpdateTime; + /** Path configuration last update time. */ + private final long mPathConfLastUpdateTime; /** * @return new configuration builder @@ -57,6 +61,8 @@ public static final class Builder { private Map> mPathConf = new HashMap<>(); private String mClusterConfHash; private String mPathConfHash; + private long mClusterConfLastUpdateTime; + private long mPathConfLastUpdateTime; /** * Adds a cluster level property. @@ -101,20 +107,42 @@ public void setPathConfHash(String hash) { mPathConfHash = hash; } + /** + * Sets cluster config last update time. + * + * @param lastUpdateTime the last update time + */ + public void setClusterConfLastUpdateTime(long lastUpdateTime) { + mClusterConfLastUpdateTime = lastUpdateTime; + } + + /** + * Sets path config last update time. + * + * @param lastUpdateTime the last update time + */ + public void setPathConfLastUpdateTime(long lastUpdateTime) { + mPathConfLastUpdateTime = lastUpdateTime; + } + /** * @return a newly constructed configuration */ public Configuration build() { - return new Configuration(mClusterConf, mPathConf, mClusterConfHash, mPathConfHash); + return new Configuration(mClusterConf, mPathConf, mClusterConfHash, mPathConfHash, + mClusterConfLastUpdateTime, mPathConfLastUpdateTime); } } private Configuration(List clusterConf, Map> pathConf, - String clusterConfHash, String pathConfHash) { + String clusterConfHash, String pathConfHash, + long clusterConfLastUpdateTime, long pathConfLastUpdateTime) { mClusterConf = clusterConf; mPathConf = pathConf; mClusterConfHash = clusterConfHash; mPathConfHash = pathConfHash; + mClusterConfLastUpdateTime = clusterConfLastUpdateTime; + mPathConfLastUpdateTime = pathConfLastUpdateTime; } private Configuration(GetConfigurationPResponse conf) { @@ -131,6 +159,8 @@ private Configuration(GetConfigurationPResponse conf) { mClusterConfHash = conf.getClusterConfigHash(); mPathConfHash = conf.getPathConfigHash(); + mClusterConfLastUpdateTime = conf.getClusterConfigLastUpdateTime(); + mPathConfLastUpdateTime = conf.getPathConfigLastUpdateTime(); } /** @@ -164,6 +194,8 @@ public GetConfigurationPResponse toProto() { if (mPathConfHash != null) { response.setPathConfigHash(mPathConfHash); } + response.setClusterConfigLastUpdateTime(mClusterConfLastUpdateTime); + response.setPathConfigLastUpdateTime(mPathConfLastUpdateTime); return response.build(); } @@ -194,4 +226,18 @@ public String getClusterConfHash() { public String getPathConfHash() { return mPathConfHash; } + + /** + * @return cluster conf last update time + */ + public long getClusterConfLastUpdateTime() { + return mClusterConfLastUpdateTime; + } + + /** + * @return path conf last update time + */ + public long getPathConfLastUpdateTime() { + return mPathConfLastUpdateTime; + } } diff --git a/core/common/src/main/java/alluxio/wire/FileInfo.java b/core/common/src/main/java/alluxio/wire/FileInfo.java index 3181f177ada5..9a613181a01f 100644 --- a/core/common/src/main/java/alluxio/wire/FileInfo.java +++ b/core/common/src/main/java/alluxio/wire/FileInfo.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; @@ -700,6 +701,10 @@ public String toString() { .add("ufsFingerprint", mUfsFingerprint) .add("acl", mAcl.toString()) .add("defaultAcl", mDefaultAcl.toString()) + .add("xattr", "[" + (mXAttr == null ? null : mXAttr.entrySet().stream() + .map(entry -> entry.getKey() + ":" + + (entry.getValue() == null ? null : new String(entry.getValue()))) + .collect(Collectors.joining(", "))) + "]") .toString(); } } diff --git a/core/common/src/main/java/alluxio/wire/MasterInfo.java b/core/common/src/main/java/alluxio/wire/MasterInfo.java index c3df43fe5d09..39485ec376a7 100644 --- a/core/common/src/main/java/alluxio/wire/MasterInfo.java +++ b/core/common/src/main/java/alluxio/wire/MasterInfo.java @@ -11,6 +11,8 @@ package alluxio.wire; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; import alluxio.util.CommonUtils; import com.google.common.base.MoreObjects; @@ -24,31 +26,33 @@ */ @NotThreadSafe public final class MasterInfo { + private static final String NONE = "N/A"; /** Master's address. */ private Address mAddress; /** The id of the master. */ private long mId; - /** Master's last updated time in ms. */ - private long mLastUpdatedTimeMs; + /** Master's start time. */ + private String mStartTime = NONE; + /** Master's last gain primacy time. */ + private String mGainPrimacyTime = NONE; + /** Master's last lose primacy time. */ + private String mLosePrimacyTime = NONE; + /** Master's last updated time. */ + private String mLastUpdatedTime = NONE; + /** Master's version. */ + private String mVersion = NONE; + /** Master's revision. */ + private String mRevision = NONE; + /** Master's last checkpoint time. */ + private String mLastCheckpointTime = NONE; + /** Master's journal entries since last checkpoint. */ + private long mJournalEntriesSinceCheckpoint = 0; /** * Creates a new instance of {@link MasterInfo}. */ public MasterInfo() {} - /** - * Creates a new instance of {@link MasterInfo}. - * - * @param id the master id to use - * @param address the master address to use - * @param lastUpdatedTimeMs the master lastUpdatedTimeMs to use - */ - public MasterInfo(long id, Address address, long lastUpdatedTimeMs) { - mAddress = Preconditions.checkNotNull(address, "address"); - mId = id; - mLastUpdatedTimeMs = lastUpdatedTimeMs; - } - /** * Creates a new instance of {@link MasterInfo}. * @@ -58,7 +62,6 @@ public MasterInfo(long id, Address address, long lastUpdatedTimeMs) { public MasterInfo(long id, Address address) { mAddress = Preconditions.checkNotNull(address, "address"); mId = id; - mLastUpdatedTimeMs = System.currentTimeMillis(); } /** @@ -76,10 +79,59 @@ public long getId() { } /** - * @return the last updated time of the master in ms + * @return the last updated time of the master + */ + public String getLastUpdatedTime() { + return mLastUpdatedTime; + } + + /** + * @return the start time of the master */ - public long getLastUpdatedTimeMs() { - return mLastUpdatedTimeMs; + public String getStartTime() { + return mStartTime; + } + + /** + * @return the last gain primacy time of the master + */ + public String getGainPrimacyTime() { + return mGainPrimacyTime; + } + + /** + * @return the last lose primacy time of the master + */ + public String getLosePrimacyTime() { + return mLosePrimacyTime; + } + + /** + * @return the version of the master + */ + public String getVersion() { + return mVersion; + } + + /** + * @return the revision of the master + */ + public String getRevision() { + return mRevision; + } + + /** + * @return the last checkpoint time + */ + public String getLastCheckpointTime() { + return mLastCheckpointTime; + } + + /** + * @return journal entries since last checkpoint + */ + public long getJournalEntriesSinceCheckpoint() { + return mJournalEntriesSinceCheckpoint; } /** @@ -101,26 +153,128 @@ public MasterInfo setId(long id) { } /** - * @param lastUpdatedTimeMs the last update time in ms + * @param lastUpdatedTime the last update time * @return the master information */ - public MasterInfo setLastUpdatedTimeMs(long lastUpdatedTimeMs) { - mLastUpdatedTimeMs = lastUpdatedTimeMs; + public MasterInfo setLastUpdatedTime(String lastUpdatedTime) { + mLastUpdatedTime = lastUpdatedTime; return this; } - @Override - public String toString() { - return MoreObjects.toStringHelper(this).add("id", mId).add("address", mAddress) - .add("lastUpdatedTime", CommonUtils.convertMsToClockTime(mLastUpdatedTimeMs)) - .toString(); + /** + * @param lastUpdatedTime the last update time in ms + * @return the master information + */ + public MasterInfo setLastUpdatedTimeMs(long lastUpdatedTime) { + return this.setLastUpdatedTime(convertMsToDate(lastUpdatedTime)); + } + + /** + * @param startTime the start time of the master + * @return the master information + */ + public MasterInfo setStartTime(String startTime) { + mStartTime = startTime; + return this; + } + + /** + * @param startTime the start time of the master in ms + * @return the master information + */ + public MasterInfo setStartTimeMs(long startTime) { + return this.setStartTime(convertMsToDate(startTime)); } /** - * Updates the last updated time of the master (in milliseconds). + * @param gainPrimacyTime the last gain primacy time of the master + * @return the master information + */ + public MasterInfo setGainPrimacyTime(String gainPrimacyTime) { + mGainPrimacyTime = gainPrimacyTime; + return this; + } + + /** + * @param gainPrimacyTimeMs the last gain primacy time of the master in ms + * @return the master information + */ + public MasterInfo setGainPrimacyTimeMs(long gainPrimacyTimeMs) { + return this.setGainPrimacyTime(convertMsToDate(gainPrimacyTimeMs)); + } + + /** + * @param losePrimacyTime the last lose primacy time of the master + * @return the master information + */ + public MasterInfo setLosePrimacyTime(String losePrimacyTime) { + mLosePrimacyTime = losePrimacyTime; + return this; + } + + /** + * @param losePrimacyTimeMs the last lose primacy time of the master in ms + * @return the master information + */ + public MasterInfo setLosePrimacyTimeMs(long losePrimacyTimeMs) { + return this.setLosePrimacyTime(convertMsToDate(losePrimacyTimeMs)); + } + + /** + * @param version the version of the master + * @return the master information */ - public void updateLastUpdatedTimeMs() { - mLastUpdatedTimeMs = System.currentTimeMillis(); + public MasterInfo setVersion(String version) { + mVersion = version; + return this; + } + + /** + * @param revision the revision of the master + * @return the master information + */ + public MasterInfo setRevision(String revision) { + mRevision = revision; + return this; + } + + /** + * @param lastCheckpointTime the last checkpoint time + * @return the master information + */ + public MasterInfo setLastCheckpointTime(String lastCheckpointTime) { + mLastCheckpointTime = lastCheckpointTime; + return this; + } + + /** + * @param lastCheckpointTime the last checkpoint time in ms + * @return the master information + */ + public MasterInfo setLastCheckpointTimeMs(long lastCheckpointTime) { + return this.setLastCheckpointTime(convertMsToDate(lastCheckpointTime)); + } + + /** + * @param journalEntriesSinceCheckpoint journal entries since last checkpoint + * @return the master information + */ + public MasterInfo setJournalEntriesSinceCheckpoint(long journalEntriesSinceCheckpoint) { + mJournalEntriesSinceCheckpoint = journalEntriesSinceCheckpoint; + return this; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this).add("id", mId).add("address", mAddress) + .add("lastUpdatedTime", mLastUpdatedTime) + .add("startTime", mStartTime) + .add("gainPrimacyTime", mGainPrimacyTime) + .add("losePrimacyTime", mLosePrimacyTime) + .add("lastCheckpointTime", mLastCheckpointTime) + .add("journalEntriesSinceCheckpoint", mJournalEntriesSinceCheckpoint) + .add("version", mVersion) + .add("revision", mRevision).toString(); } @Override @@ -133,11 +287,28 @@ public boolean equals(Object o) { } MasterInfo that = (MasterInfo) o; return mId == that.mId && Objects.equal(mAddress, that.mAddress) - && mLastUpdatedTimeMs == that.mLastUpdatedTimeMs; + && mLastUpdatedTime.equals(that.mLastUpdatedTime) + && mStartTime.equals(that.mStartTime) + && mGainPrimacyTime.equals(that.mGainPrimacyTime) + && mLosePrimacyTime.equals(that.mLosePrimacyTime) + && mLastCheckpointTime.equals(that.mLastCheckpointTime) + && mJournalEntriesSinceCheckpoint == that.mJournalEntriesSinceCheckpoint + && mVersion.equals(that.mVersion) + && mRevision.equals(that.mRevision); } @Override public int hashCode() { - return Objects.hashCode(mId, mAddress, mLastUpdatedTimeMs); + return Objects.hashCode(mId, mAddress, mLastUpdatedTime, mStartTime, mGainPrimacyTime, + mLosePrimacyTime, mLastCheckpointTime, mJournalEntriesSinceCheckpoint, + mVersion, mRevision); + } + + private static String convertMsToDate(long timeMs) { + if (timeMs <= 0) { + return NONE; + } + return CommonUtils.convertMsToDate(timeMs, + Configuration.getString(PropertyKey.USER_DATE_FORMAT_PATTERN)); } } diff --git a/core/common/src/main/java/alluxio/wire/MasterWebUIConfiguration.java b/core/common/src/main/java/alluxio/wire/MasterWebUIConfiguration.java index 2f91532d0dbd..ce12ff38800b 100644 --- a/core/common/src/main/java/alluxio/wire/MasterWebUIConfiguration.java +++ b/core/common/src/main/java/alluxio/wire/MasterWebUIConfiguration.java @@ -28,6 +28,7 @@ public final class MasterWebUIConfiguration implements Serializable { private List mWhitelist; private TreeSet> mConfiguration; + private ConfigHash mConfigHash; /** * Creates a new instance of {@link MasterWebUIConfiguration}. @@ -76,6 +77,23 @@ public MasterWebUIConfiguration setWhitelist(List whitelist) { return this; } + /** + * @return cluster config hash + */ + public ConfigHash getConfigHash() { + return mConfigHash; + } + + /** + * Sets config hash. + * @param configHash the config hash + * @return the configuration + */ + public MasterWebUIConfiguration setConfigHash(ConfigHash configHash) { + mConfigHash = configHash; + return this; + } + @Override public String toString() { return MoreObjects.toStringHelper(this).add("configuration", mConfiguration) diff --git a/core/common/src/main/java/alluxio/wire/MasterWebUIMasters.java b/core/common/src/main/java/alluxio/wire/MasterWebUIMasters.java index fac2ada17a25..4e88b2e96523 100644 --- a/core/common/src/main/java/alluxio/wire/MasterWebUIMasters.java +++ b/core/common/src/main/java/alluxio/wire/MasterWebUIMasters.java @@ -24,9 +24,9 @@ public final class MasterWebUIMasters implements Serializable { private static final long serialVersionUID = -2709466215687255197L; private boolean mDebug; - private MasterInfo[] mFailedMasterInfos; - private MasterInfo[] mNormalMasterInfos; - private MasterInfo mLeaderMasterInfo; + private MasterInfo[] mLostMasterInfos; + private MasterInfo[] mStandbyMasterInfos; + private MasterInfo mPrimaryMasterInfo; /** * Creates a new instance of {@link MasterWebUIMasters}. @@ -44,37 +44,37 @@ public boolean getDebug() { } /** - * Get failed master infos master info [ ]. + * Get info of lost masters. * - * @return the master info [ ] + * @return an array of lost {@link MasterInfo} */ - public MasterInfo[] getFailedMasterInfos() { - return mFailedMasterInfos; + public MasterInfo[] getLostMasterInfos() { + return mLostMasterInfos; } /** - * Get leader master info master info. + * Get info of standby masters. * - * @return the master info + * @return an array of standby {@link MasterInfo} */ - public MasterInfo[] getNormalMasterInfos() { - return mNormalMasterInfos; + public MasterInfo[] getStandbyMasterInfos() { + return mStandbyMasterInfos; } /** - * Get normal master infos master info [ ]. + * Get info of the primary master. * - * @return the master info [ ] + * @return the primary {@link MasterInfo} */ - public MasterInfo getLeaderMasterInfo() { - return mLeaderMasterInfo; + public MasterInfo getPrimaryMasterInfo() { + return mPrimaryMasterInfo; } /** * Sets debug. * * @param debug the debug - * @return the debug master infos + * @return the {@link MasterWebUIMasters} instance */ public MasterWebUIMasters setDebug(boolean debug) { mDebug = debug; @@ -82,43 +82,43 @@ public MasterWebUIMasters setDebug(boolean debug) { } /** - * Sets failed master infos. + * Sets lost master infos. * - * @param failedMasterInfos the failed master infos - * @return the failed master infos + * @param lostMasterInfos an array of lost {@link MasterInfo} + * @return the {@link MasterWebUIMasters} instance */ - public MasterWebUIMasters setFailedMasterInfos(MasterInfo[] failedMasterInfos) { - mFailedMasterInfos = failedMasterInfos.clone(); + public MasterWebUIMasters setLostMasterInfos(MasterInfo[] lostMasterInfos) { + mLostMasterInfos = lostMasterInfos.clone(); return this; } /** - * Sets normal master infos. + * Sets standby master infos. * - * @param normalMasterInfos the normal master infos - * @return the normal master infos + * @param standbyMasterInfos an array of standby {@link MasterInfo} + * @return the {@link MasterWebUIMasters} instance */ - public MasterWebUIMasters setNormalMasterInfos(MasterInfo[] normalMasterInfos) { - mNormalMasterInfos = normalMasterInfos.clone(); + public MasterWebUIMasters setStandbyMasterInfos(MasterInfo[] standbyMasterInfos) { + mStandbyMasterInfos = standbyMasterInfos.clone(); return this; } /** - * Sets leader master info. + * Sets primary master info. * - * @param leaderMasterInfo the normal master info - * @return the leader master info + * @param primaryMasterInfo the primary {@link MasterInfo} + * @return the {@link MasterWebUIMasters} instance */ - public MasterWebUIMasters setLeaderMasterInfo(MasterInfo leaderMasterInfo) { - mLeaderMasterInfo = leaderMasterInfo; + public MasterWebUIMasters setPrimaryMasterInfo(MasterInfo primaryMasterInfo) { + mPrimaryMasterInfo = primaryMasterInfo; return this; } @Override public String toString() { return MoreObjects.toStringHelper(this).add("debug", mDebug) - .add("failedMasterInfos", mFailedMasterInfos) - .add("normalMasterInfos", mNormalMasterInfos) - .add("leaderMasterInfo", mLeaderMasterInfo).toString(); + .add("lostMasterInfos", mLostMasterInfos) + .add("standbyMasterInfos", mStandbyMasterInfos) + .add("primaryMasterInfo", mPrimaryMasterInfo).toString(); } } diff --git a/core/common/src/main/java/alluxio/wire/MasterWebUIOverview.java b/core/common/src/main/java/alluxio/wire/MasterWebUIOverview.java index 2ab5f40bef29..28861a8bf570 100644 --- a/core/common/src/main/java/alluxio/wire/MasterWebUIOverview.java +++ b/core/common/src/main/java/alluxio/wire/MasterWebUIOverview.java @@ -53,8 +53,10 @@ public final class MasterWebUIOverview implements Serializable { private String mUsedCapacity; private String mUniqueBlockCount; private String mVersion; + private String mRevision; private String mMasterRole; private String mLeaderId; + private String mSystemStatus; /** * Creates a new instance of {@link MasterWebUIOverview}. @@ -238,6 +240,15 @@ public String getVersion() { return mVersion; } + /** + * Gets revision. + * + * @return the revision + */ + public String getRevision() { + return mRevision; + } + /** * Gets config check warn num. * @@ -292,11 +303,20 @@ public String getLeaderId() { return mLeaderId; } + /** + * Gets system status. + * + * @return the system status + */ + public String getSystemStatus() { + return mSystemStatus; + } + /** * Sets capacity. * * @param capacity the capacity - * @return capacity + * @return the updated {@link MasterWebUIOverview} object */ public MasterWebUIOverview setCapacity(String capacity) { mCapacity = capacity; @@ -318,7 +338,7 @@ public MasterWebUIOverview setClusterId(String clusterId) { * Sets config check error num. * * @param configCheckErrorNum the config check error num - * @return config check error num + * @return the updated {@link MasterWebUIOverview} object */ public MasterWebUIOverview setConfigCheckErrorNum(int configCheckErrorNum) { mConfigCheckErrorNum = configCheckErrorNum; @@ -329,7 +349,7 @@ public MasterWebUIOverview setConfigCheckErrorNum(int configCheckErrorNum) { * Sets config check errors. * * @param configCheckErrors the config check errors - * @return config check errors + * @return the updated {@link MasterWebUIOverview} object */ public MasterWebUIOverview setConfigCheckErrors( Map> configCheckErrors) { @@ -341,7 +361,7 @@ public MasterWebUIOverview setConfigCheckErrors( * Sets config check status. * * @param configCheckStatus the config check status - * @return config check status + * @return the updated {@link MasterWebUIOverview} object */ public MasterWebUIOverview setConfigCheckStatus( ConfigStatus configCheckStatus) { @@ -353,7 +373,7 @@ public MasterWebUIOverview setConfigCheckStatus( * Sets config check warns. * * @param configCheckWarns the config check warns - * @return config check warns + * @return the updated {@link MasterWebUIOverview} object */ public MasterWebUIOverview setConfigCheckWarns( Map> configCheckWarns) { @@ -365,7 +385,7 @@ public MasterWebUIOverview setConfigCheckWarns( * Sets debug. * * @param debug the debug - * @return debug + * @return the updated {@link MasterWebUIOverview} object */ public MasterWebUIOverview setDebug(boolean debug) { mDebug = debug; @@ -376,7 +396,7 @@ public MasterWebUIOverview setDebug(boolean debug) { * Sets disk capacity. * * @param diskCapacity the disk capacity - * @return disk capacity + * @return the updated {@link MasterWebUIOverview} object */ public MasterWebUIOverview setDiskCapacity(String diskCapacity) { mDiskCapacity = diskCapacity; @@ -387,7 +407,7 @@ public MasterWebUIOverview setDiskCapacity(String diskCapacity) { * Sets disk free capacity. * * @param diskFreeCapacity the disk free capacity - * @return disk free capacity + * @return the updated {@link MasterWebUIOverview} object */ public MasterWebUIOverview setDiskFreeCapacity(String diskFreeCapacity) { mDiskFreeCapacity = diskFreeCapacity; @@ -398,7 +418,7 @@ public MasterWebUIOverview setDiskFreeCapacity(String diskFreeCapacity) { * Sets disk used capacity. * * @param diskUsedCapacity the disk used capacity - * @return disk used capacity + * @return the updated {@link MasterWebUIOverview} object */ public MasterWebUIOverview setDiskUsedCapacity(String diskUsedCapacity) { mDiskUsedCapacity = diskUsedCapacity; @@ -409,7 +429,7 @@ public MasterWebUIOverview setDiskUsedCapacity(String diskUsedCapacity) { * Sets free capacity. * * @param freeCapacity the free capacity - * @return free capacity + * @return the updated {@link MasterWebUIOverview} object */ public MasterWebUIOverview setFreeCapacity(String freeCapacity) { mFreeCapacity = freeCapacity; @@ -438,7 +458,7 @@ public MasterWebUIOverview setJournalDiskWarnings(List journalDiskWarnin * Sets live worker nodes. * * @param liveWorkerNodes the live worker nodes - * @return live worker nodes + * @return the updated {@link MasterWebUIOverview} object */ public MasterWebUIOverview setLiveWorkerNodes(String liveWorkerNodes) { mLiveWorkerNodes = liveWorkerNodes; @@ -449,7 +469,7 @@ public MasterWebUIOverview setLiveWorkerNodes(String liveWorkerNodes) { * Sets master node address. * * @param masterNodeAddress the master node address - * @return master node address + * @return the master webui overview */ public MasterWebUIOverview setMasterNodeAddress(String masterNodeAddress) { mMasterNodeAddress = masterNodeAddress; @@ -460,7 +480,7 @@ public MasterWebUIOverview setMasterNodeAddress(String masterNodeAddress) { * Sets start time. * * @param startTime the start time - * @return start time + * @return the master webui overview */ public MasterWebUIOverview setStartTime(String startTime) { mStartTime = startTime; @@ -471,7 +491,7 @@ public MasterWebUIOverview setStartTime(String startTime) { * Sets storage tier infos. * * @param storageTierInfos the storage tier infos - * @return storage tier infos + * @return the master webui overview */ public MasterWebUIOverview setStorageTierInfos(List storageTierInfos) { mStorageTierInfos = storageTierInfos; @@ -482,7 +502,7 @@ public MasterWebUIOverview setStorageTierInfos(List storageTier * Sets uptime. * * @param uptime the uptime - * @return uptime + * @return the master webui overview */ public MasterWebUIOverview setUptime(String uptime) { mUptime = uptime; @@ -493,7 +513,7 @@ public MasterWebUIOverview setUptime(String uptime) { * Sets used capacity. * * @param usedCapacity the used capacity - * @return used capacity + * @return the master webui overview */ public MasterWebUIOverview setUsedCapacity(String usedCapacity) { mUsedCapacity = usedCapacity; @@ -504,18 +524,29 @@ public MasterWebUIOverview setUsedCapacity(String usedCapacity) { * Sets version. * * @param version the version - * @return version + * @return the master webui overview */ public MasterWebUIOverview setVersion(String version) { mVersion = version; return this; } + /** + * Sets revision. + * + * @param revision the revision + * @return the master webui overview + */ + public MasterWebUIOverview setRevision(String revision) { + mRevision = revision; + return this; + } + /** * Sets config check warn num. * * @param configCheckWarnNum the config check warn num - * @return config check warn num + * @return the master webui overview */ public MasterWebUIOverview setConfigCheckWarnNum(int configCheckWarnNum) { mConfigCheckWarnNum = configCheckWarnNum; @@ -526,7 +557,7 @@ public MasterWebUIOverview setConfigCheckWarnNum(int configCheckWarnNum) { * Sets unique block count. * * @param uniqueBlockCount the unique block count - * @return unique block count + * @return the master webui overview */ public MasterWebUIOverview setUniqueBlockCount(String uniqueBlockCount) { mUniqueBlockCount = uniqueBlockCount; @@ -537,7 +568,7 @@ public MasterWebUIOverview setUniqueBlockCount(String uniqueBlockCount) { * Sets total path. * * @param totalPath the total path - * @return total path + * @return the master webui overview */ public MasterWebUIOverview setTotalPath(String totalPath) { mTotalPath = totalPath; @@ -548,7 +579,7 @@ public MasterWebUIOverview setTotalPath(String totalPath) { * Sets replica block count. * * @param replicaBlockCount the replica block count - * @return replica block count + * @return the master webui overview */ public MasterWebUIOverview setReplicaBlockCount(String replicaBlockCount) { mReplicaBlockCount = replicaBlockCount; @@ -559,7 +590,7 @@ public MasterWebUIOverview setReplicaBlockCount(String replicaBlockCount) { * Sets the master role name. * * @param roleName the master role name - * @return master role name + * @return the master webui overview */ public MasterWebUIOverview setMasterRole(String roleName) { mMasterRole = roleName; @@ -570,13 +601,24 @@ public MasterWebUIOverview setMasterRole(String roleName) { * Sets the leader id. * * @param leaderId the leader id - * @return leader id + * @return the master webui overview */ public MasterWebUIOverview setLeaderId(String leaderId) { mLeaderId = leaderId; return this; } + /** + * Sets the system status. + * + * @param systemStatus the system status + * @return the master status system + */ + public MasterWebUIOverview setSystemStatus(String systemStatus) { + mSystemStatus = systemStatus; + return this; + } + @Override public String toString() { return MoreObjects.toStringHelper(this).add("capacity", mCapacity) @@ -592,8 +634,10 @@ public String toString() { .add("replicaBlockCount", mReplicaBlockCount) .add("startTime", mStartTime).add("storageTierInfos", mStorageTierInfos) .add("totalPath", mTotalPath).add("uniqueBlockCount", mUniqueBlockCount) - .add("uptime", mUptime).add("usedCapacity", mUsedCapacity).add("version", mVersion) + .add("uptime", mUptime).add("usedCapacity", mUsedCapacity) + .add("version", mVersion).add("revision", mRevision) .add("leaderId", mLeaderId) + .add("systemStatus", mSystemStatus) .add("masterRole", mMasterRole) .toString(); } diff --git a/core/common/src/main/java/alluxio/wire/WorkerInfo.java b/core/common/src/main/java/alluxio/wire/WorkerInfo.java index 14eb64af247d..42872ade9ebe 100644 --- a/core/common/src/main/java/alluxio/wire/WorkerInfo.java +++ b/core/common/src/main/java/alluxio/wire/WorkerInfo.java @@ -41,6 +41,7 @@ public final class WorkerInfo implements Serializable { private long mBlockCount; private String mVersion = ""; private String mRevision = ""; + private int mNumVCpu; /** * @return the worker id @@ -130,6 +131,14 @@ public String getRevision() { return mRevision; } + /** + * @return the git revision at the time of building the worker + */ + @ApiModelProperty(value = "Number of available processors on the worker") + public int getNumVCpu() { + return mNumVCpu; + } + /** * @param id the worker id to use * @return the worker information @@ -231,6 +240,15 @@ public WorkerInfo setRevision(String revision) { return this; } + /** + * @param numVCpu the number of available processors on the worker + * @return the worker information + */ + public WorkerInfo setNumVCpu(int numVCpu) { + mNumVCpu = numVCpu; + return this; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -246,7 +264,8 @@ public boolean equals(Object o) { && mStartTimeMs == that.mStartTimeMs && Objects.equal(mCapacityBytesOnTiers, that.mCapacityBytesOnTiers) && Objects.equal(mUsedBytesOnTiers, that.mUsedBytesOnTiers) - && mVersion.equals(that.mVersion) && mRevision.equals(that.mRevision); + && mVersion.equals(that.mVersion) && mRevision.equals(that.mRevision) + && mNumVCpu == that.mNumVCpu; } /** @@ -287,7 +306,7 @@ public LastContactSecComparator() {} @Override public int hashCode() { return Objects.hashCode(mId, mAddress, mLastContactSec, mState, mCapacityBytes, mUsedBytes, - mStartTimeMs, mCapacityBytesOnTiers, mUsedBytesOnTiers, mVersion, mRevision); + mStartTimeMs, mCapacityBytesOnTiers, mUsedBytesOnTiers, mVersion, mRevision, mNumVCpu); } @Override @@ -297,6 +316,7 @@ public String toString() { .add("capacityBytes", mCapacityBytes).add("usedBytes", mUsedBytes) .add("startTimeMs", mStartTimeMs).add("capacityBytesOnTiers", mCapacityBytesOnTiers) .add("usedBytesOnTiers", mUsedBytesOnTiers) - .add("version", mVersion).add("revision", mRevision).toString(); + .add("version", mVersion).add("revision", mRevision) + .add("numVCpu", mNumVCpu).toString(); } } diff --git a/core/common/src/main/java/alluxio/wire/WorkerNetAddress.java b/core/common/src/main/java/alluxio/wire/WorkerNetAddress.java index ed71d37e7652..7a2a9bedbc98 100644 --- a/core/common/src/main/java/alluxio/wire/WorkerNetAddress.java +++ b/core/common/src/main/java/alluxio/wire/WorkerNetAddress.java @@ -32,6 +32,8 @@ public final class WorkerNetAddress implements Serializable { private static final long serialVersionUID = 0L; + public static final WorkerNetAddress DUMMY = new WorkerNetAddress(); + private String mHost = ""; private String mContainerHost = ""; private int mRpcPort; diff --git a/core/common/src/main/java/alluxio/wire/WorkerWebUIConfiguration.java b/core/common/src/main/java/alluxio/wire/WorkerWebUIConfiguration.java index d1d2435c229b..8411e98354da 100644 --- a/core/common/src/main/java/alluxio/wire/WorkerWebUIConfiguration.java +++ b/core/common/src/main/java/alluxio/wire/WorkerWebUIConfiguration.java @@ -28,6 +28,10 @@ public final class WorkerWebUIConfiguration implements Serializable { private List mWhitelist; private TreeSet> mConfiguration; + private String mClusterConfigHash; + private String mPathConfigHash; + private String mClusterConfigLastUpdateTime; + private String mPathConfigLastUpdateTime; /** * Creates a new instance of {@link WorkerWebUIConfiguration}. @@ -76,6 +80,78 @@ public WorkerWebUIConfiguration setWhitelist(List whitelist) { return this; } + /** + * @return cluster config hash + */ + public String getClusterConfigHash() { + return mClusterConfigHash; + } + + /** + * Sets cluster config hash. + * @param clusterConfigHash the cluster config hash + * @return the configuration + */ + public WorkerWebUIConfiguration setClusterConfigHash(String clusterConfigHash) { + mClusterConfigHash = clusterConfigHash; + return this; + } + + /** + * @return path config hash + */ + public String getPathConfigHash() { + return mPathConfigHash; + } + + /** + * Sets path config hash. + * + * @param pathConfigHash the path config hash + * @return the configuration + */ + public WorkerWebUIConfiguration setPathConfigHash(String pathConfigHash) { + mPathConfigHash = pathConfigHash; + return this; + } + + /** + * @return cluster config last update time + */ + public String getClusterConfigLastUpdateTime() { + return mClusterConfigLastUpdateTime; + } + + /** + * Sets cluster config last update time. + * + * @param clusterConfigLastUpdateTime the cluster config last update time + * @return the configuration + */ + public WorkerWebUIConfiguration setClusterConfigLastUpdateTime( + String clusterConfigLastUpdateTime) { + mClusterConfigLastUpdateTime = clusterConfigLastUpdateTime; + return this; + } + + /** + * @return path config last update time + */ + public String getPathConfigLastUpdateTime() { + return mPathConfigLastUpdateTime; + } + + /** + * Sets the path config last update time. + * @param pathConfigLastUpdateTime path config last update time + * @return the configuration + */ + public WorkerWebUIConfiguration setPathConfigLastUpdateTime( + String pathConfigLastUpdateTime) { + mPathConfigLastUpdateTime = pathConfigLastUpdateTime; + return this; + } + @Override public String toString() { return MoreObjects.toStringHelper(this).add("configuration", mConfiguration) diff --git a/core/common/src/main/java/alluxio/wire/WorkerWebUILogs.java b/core/common/src/main/java/alluxio/wire/WorkerWebUILogs.java index 7120ab1ca794..e31258128f70 100644 --- a/core/common/src/main/java/alluxio/wire/WorkerWebUILogs.java +++ b/core/common/src/main/java/alluxio/wire/WorkerWebUILogs.java @@ -203,7 +203,7 @@ public WorkerWebUILogs setViewingOffset(long viewingOffset) { @Override public String toString() { - return MoreObjects.toStringHelper(this).add("currentPath", mCurrentPath).add("cebug", mDebug) + return MoreObjects.toStringHelper(this).add("currentPath", mCurrentPath).add("debug", mDebug) .add("fatalError", mFatalError).add("fileData", mFileData).add("fileInfos", mFileInfos) .add("invalidPathError", mInvalidPathError).add("nTotalFile", mNTotalFile) .add("viewingOffset", mViewingOffset).toString(); diff --git a/core/common/src/main/java/alluxio/wire/WorkerWebUIOperations.java b/core/common/src/main/java/alluxio/wire/WorkerWebUIOperations.java new file mode 100644 index 000000000000..daef90540cba --- /dev/null +++ b/core/common/src/main/java/alluxio/wire/WorkerWebUIOperations.java @@ -0,0 +1,78 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.wire; + +import com.google.common.base.MoreObjects; + +import java.io.Serializable; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * Alluxio WebUI overview information. + */ +@NotThreadSafe +public final class WorkerWebUIOperations implements Serializable { + private static final long serialVersionUID = 5444572986825500733L; + + private long mOperationCount; + private long mRpcQueueLength; + + /** + * Creates a new instance of {@link WorkerWebUIInit}. + */ + public WorkerWebUIOperations() { + } + + /** + * Gets the operation count. + * @return the number of operations + */ + public long getOperationCount() { + return mOperationCount; + } + + /** + * Gets the current RPC queue length. + * @return the RPC queue length + */ + public long getRpcQueueLength() { + return mRpcQueueLength; + } + + /** + * Sets the operation count. + * @param operationCount the operation count + * @return the current obj + */ + public WorkerWebUIOperations setOperationCount(long operationCount) { + mOperationCount = operationCount; + return this; + } + + /** + * Sets the RPC queue length. + * @param rpcQueueLength queue length + * @return the current obj + */ + public WorkerWebUIOperations setRpcQueueLength(long rpcQueueLength) { + mRpcQueueLength = rpcQueueLength; + return this; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("operationCount", mOperationCount) + .add("rpcQueueLength", mRpcQueueLength) + .toString(); + } +} diff --git a/core/common/src/main/java/alluxio/wire/WorkerWebUIOverview.java b/core/common/src/main/java/alluxio/wire/WorkerWebUIOverview.java index 1dfbe17708c8..bb090e9021d8 100644 --- a/core/common/src/main/java/alluxio/wire/WorkerWebUIOverview.java +++ b/core/common/src/main/java/alluxio/wire/WorkerWebUIOverview.java @@ -34,6 +34,7 @@ public final class WorkerWebUIOverview implements Serializable { private String mUsedBytes; private String mBlockCount; private String mVersion; + private String mRevision; private UIWorkerInfo mWorkerInfo; /** @@ -96,6 +97,15 @@ public String getVersion() { return mVersion; } + /** + * Gets revision. + * + * @return the revision + */ + public String getRevision() { + return mRevision; + } + /** * Gets worker info. * @@ -109,7 +119,7 @@ public UIWorkerInfo getWorkerInfo() { * Sets capacity bytes. * * @param CapacityBytes the capacity bytes - * @return the capacity bytes + * @return the worker webui overview */ public WorkerWebUIOverview setCapacityBytes(String CapacityBytes) { mCapacityBytes = CapacityBytes; @@ -120,7 +130,7 @@ public WorkerWebUIOverview setCapacityBytes(String CapacityBytes) { * Sets storage dirs. * * @param StorageDirs the storage dirs - * @return the storage dirs + * @return the worker webui overview */ public WorkerWebUIOverview setStorageDirs(List StorageDirs) { mStorageDirs = StorageDirs; @@ -131,7 +141,7 @@ public WorkerWebUIOverview setStorageDirs(List StorageDirs) { * Sets usage on tiers. * * @param UsageOnTiers the usage on tiers - * @return the usage on tiers + * @return the worker webui overview */ public WorkerWebUIOverview setUsageOnTiers(List UsageOnTiers) { mUsageOnTiers = UsageOnTiers; @@ -142,7 +152,7 @@ public WorkerWebUIOverview setUsageOnTiers(List UsageOnTiers) { * Sets used bytes. * * @param UsedBytes the used bytes - * @return the used bytes + * @return the worker webui overview */ public WorkerWebUIOverview setUsedBytes(String UsedBytes) { mUsedBytes = UsedBytes; @@ -153,7 +163,7 @@ public WorkerWebUIOverview setUsedBytes(String UsedBytes) { * Sets worker block count. * * @param blockCount the block count on this worker - * @return unique block count + * @return the worker webui overview */ public WorkerWebUIOverview setBlockCount(String blockCount) { mBlockCount = blockCount; @@ -163,11 +173,22 @@ public WorkerWebUIOverview setBlockCount(String blockCount) { /** * Sets version. * - * @param Version the version - * @return the version + * @param version the version + * @return the worker webui overview + */ + public WorkerWebUIOverview setVersion(String version) { + mVersion = version; + return this; + } + + /** + * Sets revision. + * + * @param revision the revision + * @return the worker webui overview */ - public WorkerWebUIOverview setVersion(String Version) { - mVersion = Version; + public WorkerWebUIOverview setRevision(String revision) { + mRevision = revision; return this; } @@ -175,7 +196,7 @@ public WorkerWebUIOverview setVersion(String Version) { * Sets worker info. * * @param WorkerInfo the worker info - * @return the worker info + * @return the worker webui overview */ public WorkerWebUIOverview setWorkerInfo(UIWorkerInfo WorkerInfo) { mWorkerInfo = WorkerInfo; @@ -186,7 +207,10 @@ public WorkerWebUIOverview setWorkerInfo(UIWorkerInfo WorkerInfo) { public String toString() { return MoreObjects.toStringHelper(this).add("capacityBytes", mCapacityBytes) .add("storageDirs", mStorageDirs).add("usageOnTiers", mUsageOnTiers) - .add("usedBytes", mUsedBytes).add("version", mVersion).add("workerInfo", mWorkerInfo) + .add("usedBytes", mUsedBytes) + .add("version", mVersion) + .add("revision", mRevision) + .add("workerInfo", mWorkerInfo) .add("blockCount", mBlockCount).toString(); } } diff --git a/core/common/src/main/java/alluxio/worker/block/BlockHeartbeatReport.java b/core/common/src/main/java/alluxio/worker/block/BlockHeartbeatReport.java index 5fb02209f85f..32419475151e 100644 --- a/core/common/src/main/java/alluxio/worker/block/BlockHeartbeatReport.java +++ b/core/common/src/main/java/alluxio/worker/block/BlockHeartbeatReport.java @@ -74,4 +74,15 @@ public List getRemovedBlocks() { public Map> getLostStorage() { return Collections.unmodifiableMap(mLostStorage); } + + /** + * @return the number of blocks in the report + */ + public int getBlockChangeCount() { + int count = mRemovedBlocks.size(); + for (List blocks: mAddedBlocks.values()) { + count += blocks.size(); + } + return count; + } } diff --git a/core/common/src/main/java/alluxio/worker/block/BlockStore.java b/core/common/src/main/java/alluxio/worker/block/BlockStore.java index c5329e717610..5e84e9a85946 100644 --- a/core/common/src/main/java/alluxio/worker/block/BlockStore.java +++ b/core/common/src/main/java/alluxio/worker/block/BlockStore.java @@ -97,7 +97,7 @@ BlockReader createBlockReader(long sessionId, long blockId, long offset, /** * Creates a block reader to read a UFS block starting from given block offset. - * Owner of this block reader must close it to cleanup state. + * Owner of this block reader must close it to clean up state. * * @param sessionId the client session ID * @param blockId the ID of the UFS block to read diff --git a/core/common/src/main/java/alluxio/worker/block/BlockStoreEventListener.java b/core/common/src/main/java/alluxio/worker/block/BlockStoreEventListener.java index d718c004e5a2..eca88c69191f 100644 --- a/core/common/src/main/java/alluxio/worker/block/BlockStoreEventListener.java +++ b/core/common/src/main/java/alluxio/worker/block/BlockStoreEventListener.java @@ -42,11 +42,18 @@ public interface BlockStoreEventListener { void onAbortBlock(long blockId); /** - * Actions when committing a temporary block to a {@link BlockStoreLocation}. + * Actions when committing a temporary block to a {@link BlockStoreLocation} at local block store. * @param blockId the id of the block to commit * @param location the location of the block to be committed */ - void onCommitBlock(long blockId, BlockStoreLocation location); + void onCommitBlockToLocal(long blockId, BlockStoreLocation location); + + /** + * Actions when a temporary block has been committed to the alluxio master. + * @param blockId the id of the block to commit + * @param location the location of the block to be committed + */ + void onCommitBlockToMaster(long blockId, BlockStoreLocation location); /** * Actions when moving a block by a client from a {@link BlockStoreLocation} to another. diff --git a/core/common/src/main/java/alluxio/worker/block/BlockWorker.java b/core/common/src/main/java/alluxio/worker/block/BlockWorker.java index 7346691c68dc..8d65db1b0650 100644 --- a/core/common/src/main/java/alluxio/worker/block/BlockWorker.java +++ b/core/common/src/main/java/alluxio/worker/block/BlockWorker.java @@ -21,6 +21,7 @@ import alluxio.proto.dataserver.Protocol; import alluxio.wire.Configuration; import alluxio.wire.FileInfo; +import alluxio.wire.WorkerNetAddress; import alluxio.worker.SessionCleanable; import alluxio.worker.Worker; import alluxio.worker.block.io.BlockReader; @@ -237,4 +238,9 @@ BlockReader createUfsBlockReader(long sessionId, long blockId, long offset, bool * @return the block store */ BlockStore getBlockStore(); + + /** + * @return the worker address + */ + WorkerNetAddress getWorkerAddress(); } diff --git a/core/common/src/main/java/alluxio/worker/block/BlockWorkerMetrics.java b/core/common/src/main/java/alluxio/worker/block/BlockWorkerMetrics.java new file mode 100644 index 000000000000..e106c37b7e42 --- /dev/null +++ b/core/common/src/main/java/alluxio/worker/block/BlockWorkerMetrics.java @@ -0,0 +1,131 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import alluxio.StorageTierAssoc; + +import java.util.HashMap; +import java.util.Map; + +/** + * The BlockMetaMetricCache cache the metric data of the BlockMeta from the BlockWorker. + * + * So the BlockWorker can pass this MetricCache to registerGauge instead of let registerGauge + * copy a whole BlockMeta everytime updating the metrics. + */ +public class BlockWorkerMetrics { + private final long mCapacityBytes; + private final long mUsedBytes; + private final long mCapacityFree; + + private final Map mCapacityBytesOnTiers; + private final Map mUsedBytesOnTiers; + private final Map mFreeBytesOnTiers; + private final int mNumberOfBlocks; + + /** + * construct func of BlockWorkerMetrics. + * @param capacityBytes + * @param usedBytes + * @param capacityFree + * @param capacityBytesOnTiers + * @param usedBytesOnTiers + * @param freeBytesOnTiers + * @param numberOfBlocks + */ + public BlockWorkerMetrics(long capacityBytes, long usedBytes, long capacityFree, + Map capacityBytesOnTiers, + Map usedBytesOnTiers, + Map freeBytesOnTiers, int numberOfBlocks) { + mCapacityBytes = capacityBytes; + mUsedBytes = usedBytes; + mCapacityFree = capacityFree; + mCapacityBytesOnTiers = capacityBytesOnTiers; + mUsedBytesOnTiers = usedBytesOnTiers; + mFreeBytesOnTiers = freeBytesOnTiers; + mNumberOfBlocks = numberOfBlocks; + } + + /** + * @return the capacityBytes + */ + public long getCapacityBytes() { + return mCapacityBytes; + } + + /** + * @return the usedBytes + */ + public long getUsedBytes() { + return mUsedBytes; + } + + /** + * @return the freeCapacityBytes + */ + public long getCapacityFree() { + return mCapacityFree; + } + + /** + * @return the tierCapacityBytes map + */ + public Map getCapacityBytesOnTiers() { + return mCapacityBytesOnTiers; + } + + /** + * @return the tierUsedBytes map + */ + public Map getUsedBytesOnTiers() { + return mUsedBytesOnTiers; + } + + /** + * @return the tierFreeBytes map + */ + public Map getFreeBytesOnTiers() { + return mFreeBytesOnTiers; + } + + /** + * @return the numberOfBlocks + */ + public int getNumberOfBlocks() { + return mNumberOfBlocks; + } + + /** + * return a new BlockWorkerMetrics form a new BlockStoreMeta. + * @param meta new BlockStoreMeta + * @param s the StorageTierAssoc, can't import here so pass it as param + * @return a new BlockWorkerMetrics + */ + public static BlockWorkerMetrics from(BlockStoreMeta meta, StorageTierAssoc s) { + long capacityBytes = meta.getCapacityBytes(); + long usedBytes = meta.getUsedBytes(); + long capacityFree = capacityBytes - usedBytes; + Map capacityBytesOnTiers = meta.getCapacityBytesOnTiers(); + Map usedBytesOnTiers = meta.getUsedBytesOnTiers(); + // freeBytesOnTiers is recalculated + Map freeBytesOnTiers = new HashMap<>(); + for (int i = 0; i < s.size(); i++) { + String tier = s.getAlias(i); + freeBytesOnTiers.put(tier, capacityBytesOnTiers + .getOrDefault(tier, 0L) + - usedBytesOnTiers.getOrDefault(tier, 0L)); + } + int numberOfBlocks = meta.getNumberOfBlocks(); + return new BlockWorkerMetrics(capacityBytes, usedBytes, capacityFree, + capacityBytesOnTiers, usedBytesOnTiers, freeBytesOnTiers, numberOfBlocks); + } +} diff --git a/core/common/src/test/java/alluxio/conf/path/TrieNodeTest.java b/core/common/src/test/java/alluxio/conf/path/TrieNodeTest.java index fdb8a53eb49e..62c8d6690af7 100644 --- a/core/common/src/test/java/alluxio/conf/path/TrieNodeTest.java +++ b/core/common/src/test/java/alluxio/conf/path/TrieNodeTest.java @@ -11,6 +11,8 @@ package alluxio.conf.path; +import static org.junit.Assert.assertEquals; + import com.google.common.collect.ImmutableList; import com.google.common.collect.Streams; import org.junit.Assert; @@ -74,16 +76,16 @@ public void searchExact() { TrieNode d = node.insert("/c/d"); TrieNode g = node.insert("/c/g"); TrieNode h = node.insert("/u/h"); - Assert.assertEquals(a, node.searchExact("/a").get()); - Assert.assertEquals(b, node.searchExact("/a/b").get()); - Assert.assertEquals(f, node.searchExact("/a/e/f").get()); - Assert.assertEquals(d, node.searchExact("/c/d").get()); - Assert.assertEquals(g, node.searchExact("/c/g").get()); - Assert.assertEquals(h, node.searchExact("/u/h").get()); - Assert.assertEquals(Optional.empty(), node.searchExact("/")); - Assert.assertEquals(Optional.empty(), node.searchExact("/ab")); - Assert.assertEquals(Optional.empty(), node.searchExact("/a/b/c")); - Assert.assertEquals(Optional.empty(), node.searchExact("/a/d")); + assertEquals(a, node.searchExact("/a").get()); + assertEquals(b, node.searchExact("/a/b").get()); + assertEquals(f, node.searchExact("/a/e/f").get()); + assertEquals(d, node.searchExact("/c/d").get()); + assertEquals(g, node.searchExact("/c/g").get()); + assertEquals(h, node.searchExact("/u/h").get()); + assertEquals(Optional.empty(), node.searchExact("/")); + assertEquals(Optional.empty(), node.searchExact("/ab")); + assertEquals(Optional.empty(), node.searchExact("/a/b/c")); + assertEquals(Optional.empty(), node.searchExact("/a/d")); } @Test @@ -97,17 +99,17 @@ public void deleteIfTrue() { TrieNode h = node.insert("/u/h"); Assert.assertTrue(node.search("/a/b").contains(b)); TrieNode b2 = node.deleteIf("/a/b", n -> { - Assert.assertEquals(b, n); + assertEquals(b, n); return true; }); - Assert.assertEquals(b, b2); + assertEquals(b, b2); Assert.assertFalse(node.search("/a/b").contains(b)); Assert.assertTrue(node.search("/a").contains(a)); TrieNode a2 = node.deleteIf("/a", n -> { - Assert.assertEquals(a, n); + assertEquals(a, n); return true; }); - Assert.assertEquals(a, a2); + assertEquals(a, a2); Assert.assertFalse(node.search("/a").contains(a)); Assert.assertTrue(node.search("/a/e/f").contains(f)); TrieNode c2 = node.deleteIf("/c", n -> true); @@ -115,10 +117,10 @@ public void deleteIfTrue() { Assert.assertTrue(node.search("/c/d").contains(d)); Assert.assertTrue(node.search("/c/g").contains(g)); TrieNode h2 = node.deleteIf("/u/h", n -> { - Assert.assertEquals(h, n); + assertEquals(h, n); return true; }); - Assert.assertEquals(h, h2); + assertEquals(h, h2); TrieNode nil = node.deleteIf("/n", n -> { Assert.fail(); return true; @@ -147,10 +149,10 @@ public void deleteAndInsert() { Assert.assertTrue(node.search("/a/b").contains(b)); TrieNode b2 = node.deleteIf("/a/b", n -> { - Assert.assertEquals(b, n); + assertEquals(b, n); return true; }); - Assert.assertEquals(b, b2); + assertEquals(b, b2); Assert.assertFalse(node.search("/a/b").contains(b)); TrieNode b3 = node.insert("/a/b"); Assert.assertTrue(node.search("/a/b").contains(b3)); @@ -158,10 +160,10 @@ public void deleteAndInsert() { Assert.assertTrue(node.search("/a").contains(a)); Assert.assertTrue(node.search("/a/b").contains(a)); TrieNode a2 = node.deleteIf("/a", n -> { - Assert.assertEquals(a, n); + assertEquals(a, n); return true; }); - Assert.assertEquals(a, a2); + assertEquals(a, a2); Assert.assertFalse(node.search("/a/b").contains(a)); Assert.assertFalse(node.search("/a").contains(a)); Assert.assertTrue(node.search("/a/b").contains(b3)); @@ -188,7 +190,7 @@ public void getChildren() { node.getLeafChildren("/a/e/f").toArray(TrieNode[]::new)); Assert.assertArrayEquals(new TrieNode[] {d}, node.getLeafChildren("/c/d").toArray(TrieNode[]::new)); - Assert.assertEquals(new HashSet(Arrays.asList(a, b, f, d, g, h)), + assertEquals(new HashSet(Arrays.asList(a, b, f, d, g, h)), node.getLeafChildren("/").collect(Collectors.toSet())); } @@ -205,8 +207,20 @@ public void clearTrie() { node.clear(); // after clearing, each node should only contain itself for (TrieNode nxt : ImmutableList.of(a, b, f, d, g, h)) { - Assert.assertEquals(Collections.singletonList(nxt), + assertEquals(Collections.singletonList(nxt), nxt.getLeafChildren("/").collect(Collectors.toList())); } } + + @Test + public void getLeafChildrenOnRoot() { + TrieNode node = new TrieNode<>(); + TrieNode a = node.insert("/a"); + TrieNode b = node.insert("/a/b"); + TrieNode f = node.insert("/a/e/f"); + TrieNode d = node.insert("/c/d"); + TrieNode g = node.insert("/c/g"); + TrieNode h = node.insert("/u/h"); + assertEquals(6, node.getLeafChildren("/").toArray().length); + } } diff --git a/core/common/src/test/java/alluxio/heartbeat/HeartbeatContextTest.java b/core/common/src/test/java/alluxio/heartbeat/HeartbeatContextTest.java index f5c222739dc0..0c972baf44db 100644 --- a/core/common/src/test/java/alluxio/heartbeat/HeartbeatContextTest.java +++ b/core/common/src/test/java/alluxio/heartbeat/HeartbeatContextTest.java @@ -21,7 +21,7 @@ */ public final class HeartbeatContextTest { @Test - public void allThreadsUseSleepingTimer() { + public void allThreadsUseProductionTimer() { for (String threadName : HeartbeatContext.getTimerClasses().keySet()) { Class timerClass = HeartbeatContext.getTimerClass(threadName); assertTrue(timerClass.isAssignableFrom(SleepingTimer.class)); diff --git a/core/common/src/test/java/alluxio/heartbeat/HeartbeatThreadTest.java b/core/common/src/test/java/alluxio/heartbeat/HeartbeatThreadTest.java index 5676fbe05d1c..921e250984da 100644 --- a/core/common/src/test/java/alluxio/heartbeat/HeartbeatThreadTest.java +++ b/core/common/src/test/java/alluxio/heartbeat/HeartbeatThreadTest.java @@ -139,8 +139,9 @@ public Void call() throws Exception { try (ManuallyScheduleHeartbeat.Resource r = new ManuallyScheduleHeartbeat.Resource(Arrays.asList(mThreadName))) { DummyHeartbeatExecutor executor = new DummyHeartbeatExecutor(); - HeartbeatThread ht = new HeartbeatThread(mThreadName, executor, 1, Configuration.global(), - UserState.Factory.create(Configuration.global())); + HeartbeatThread ht = new HeartbeatThread(mThreadName, executor, + () -> new FixedIntervalSupplier(1L), + Configuration.global(), UserState.Factory.create(Configuration.global())); // Run the HeartbeatThread. mExecutorService.submit(ht); @@ -166,7 +167,7 @@ private class DummyHeartbeatExecutor implements HeartbeatExecutor { private int mCounter = 0; @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { mCounter++; } diff --git a/core/common/src/test/java/alluxio/heartbeat/SleepingTimerForCronExpressionIntervalSupplierTest.java b/core/common/src/test/java/alluxio/heartbeat/SleepingTimerForCronExpressionIntervalSupplierTest.java new file mode 100644 index 000000000000..667eb05843eb --- /dev/null +++ b/core/common/src/test/java/alluxio/heartbeat/SleepingTimerForCronExpressionIntervalSupplierTest.java @@ -0,0 +1,123 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.heartbeat; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; + +import alluxio.Constants; +import alluxio.clock.ManualClock; +import alluxio.time.Sleeper; +import alluxio.time.SteppingThreadSleeper; + +import org.apache.logging.log4j.core.util.CronExpression; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; + +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.time.Duration; +import java.util.Date; + +/** + * Unit tests for {@link SleepingTimer}. + */ +public final class SleepingTimerForCronExpressionIntervalSupplierTest { + private static final String THREAD_NAME = "cron-test-thread-name"; + private static final long INTERVAL_MS = 10 * Constants.MINUTE_MS; + private Logger mMockLogger; + private ManualClock mFakeClock; + private Sleeper mMockSleeper; + private long mAllSleepTimeMs; + + @Before + public void before() throws InterruptedException { + mMockLogger = mock(Logger.class); + mFakeClock = new ManualClock(); + mMockSleeper = mock(Sleeper.class); + doAnswer((invocation) -> { + Duration duration = invocation.getArgument(0); + mFakeClock.addTime(duration); + mAllSleepTimeMs += duration.toMillis(); + return null; + }).when(mMockSleeper).sleep(any(Duration.class)); + } + + /** + * Tests that the cron timer will attempt to run at the same interval, independently of how + * long the execution between ticks takes. For example, if the interval is 100ms and execution + * takes 80ms, the timer should sleep for only 20ms to maintain the regular interval of 100ms. + */ + @Test + public void maintainInterval() throws Exception { + SleepingTimer timer = + new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, + new SteppingThreadSleeper(mMockSleeper, mFakeClock), + () -> { + try { + return new CronExpressionIntervalSupplier( + new CronExpression("* 30-59 0-1,4-9,13-23 * * ? *"), INTERVAL_MS); + } catch (ParseException e) { + throw new RuntimeException(e); + } + }); + DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + Date startDate = formatter.parse("2022-01-01 00:00:00"); + Assert.assertEquals(-1, timer.mPreviousTickedMs); + mFakeClock.setTimeMs(startDate.getTime()); + long limitMs = timer.tick(); + long lastAllSleepTimeMs = mAllSleepTimeMs; + Assert.assertEquals(30 * Constants.MINUTE_MS, mAllSleepTimeMs); + Assert.assertEquals(30 * Constants.MINUTE_MS, limitMs); + Assert.assertEquals(formatter.parse("2022-01-01 00:30:00"), new Date(timer.mPreviousTickedMs)); + Assert.assertEquals(formatter.parse("2022-01-01 00:30:00"), new Date(mFakeClock.millis())); + // Mock heartbeat 1 minute + mFakeClock.addTime(Duration.ofMinutes(1)); + + limitMs = timer.tick(); + Assert.assertEquals(9 * Constants.MINUTE_MS, mAllSleepTimeMs - lastAllSleepTimeMs); + lastAllSleepTimeMs = mAllSleepTimeMs; + Assert.assertEquals(20 * Constants.MINUTE_MS, limitMs); + Assert.assertEquals(formatter.parse("2022-01-01 00:40:00"), new Date(timer.mPreviousTickedMs)); + Assert.assertEquals(formatter.parse("2022-01-01 00:40:00"), new Date(mFakeClock.millis())); + // Mock heartbeat 5 minute + mFakeClock.addTime(Duration.ofMinutes(5)); + + limitMs = timer.tick(); + Assert.assertEquals(5 * Constants.MINUTE_MS, mAllSleepTimeMs - lastAllSleepTimeMs); + lastAllSleepTimeMs = mAllSleepTimeMs; + Assert.assertEquals(10 * Constants.MINUTE_MS, limitMs); + Assert.assertEquals(formatter.parse("2022-01-01 00:50:00"), new Date(timer.mPreviousTickedMs)); + Assert.assertEquals(formatter.parse("2022-01-01 00:50:00"), new Date(mFakeClock.millis())); + // Mock heartbeat 5 minute + mFakeClock.addTime(Duration.ofMinutes(5)); + + limitMs = timer.tick(); + Assert.assertEquals(35 * Constants.MINUTE_MS, mAllSleepTimeMs - lastAllSleepTimeMs); + lastAllSleepTimeMs = mAllSleepTimeMs; + Assert.assertEquals(30 * Constants.MINUTE_MS, limitMs); + Assert.assertEquals(formatter.parse("2022-01-01 01:30:00"), new Date(timer.mPreviousTickedMs)); + Assert.assertEquals(formatter.parse("2022-01-01 01:30:00"), new Date(mFakeClock.millis())); + // Mock heartbeat 30 minute + mFakeClock.addTime(Duration.ofMinutes(30)); + + limitMs = timer.tick(); + Assert.assertEquals(150 * Constants.MINUTE_MS, mAllSleepTimeMs - lastAllSleepTimeMs); + Assert.assertEquals(30 * Constants.MINUTE_MS, limitMs); + Assert.assertEquals(formatter.parse("2022-01-01 04:30:00"), new Date(timer.mPreviousTickedMs)); + Assert.assertEquals(formatter.parse("2022-01-01 04:30:00"), new Date(mFakeClock.millis())); + } +} diff --git a/core/common/src/test/java/alluxio/heartbeat/SleepingTimerTest.java b/core/common/src/test/java/alluxio/heartbeat/SleepingTimerTest.java index ae8ef03d8aea..fb79c749cbca 100644 --- a/core/common/src/test/java/alluxio/heartbeat/SleepingTimerTest.java +++ b/core/common/src/test/java/alluxio/heartbeat/SleepingTimerTest.java @@ -17,15 +17,21 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import alluxio.Constants; import alluxio.clock.ManualClock; +import alluxio.clock.SystemClock; import alluxio.time.Sleeper; +import alluxio.time.SteppingThreadSleeper; +import alluxio.time.ThreadSleeper; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; import org.slf4j.Logger; import java.time.Duration; +import java.util.concurrent.atomic.AtomicLong; /** * Unit tests for {@link SleepingTimer}. @@ -47,7 +53,9 @@ public void before() { @Test public void warnWhenExecutionTakesLongerThanInterval() throws Exception { SleepingTimer timer = - new SleepingTimer(THREAD_NAME, INTERVAL_MS, mMockLogger, mFakeClock, mMockSleeper); + new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, + new SteppingThreadSleeper(mMockSleeper, mFakeClock), + () -> new FixedIntervalSupplier(INTERVAL_MS, mMockLogger)); timer.tick(); mFakeClock.addTimeMs(5 * INTERVAL_MS); @@ -60,7 +68,9 @@ public void warnWhenExecutionTakesLongerThanInterval() throws Exception { @Test public void sleepForSpecifiedInterval() throws Exception { final SleepingTimer timer = - new SleepingTimer(THREAD_NAME, INTERVAL_MS, mMockLogger, mFakeClock, mMockSleeper); + new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, + new SteppingThreadSleeper(mMockSleeper, mFakeClock), + () -> new FixedIntervalSupplier(INTERVAL_MS)); timer.tick(); // first tick won't sleep verify(mMockSleeper, times(0)).sleep(any(Duration.class)); timer.tick(); @@ -75,11 +85,45 @@ public void sleepForSpecifiedInterval() throws Exception { @Test public void maintainInterval() throws Exception { SleepingTimer stimer = - new SleepingTimer(THREAD_NAME, INTERVAL_MS, mMockLogger, mFakeClock, mMockSleeper); + new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, + new SteppingThreadSleeper(mMockSleeper, mFakeClock), + () -> new FixedIntervalSupplier(INTERVAL_MS)); stimer.tick(); mFakeClock.addTimeMs(INTERVAL_MS / 3); stimer.tick(); verify(mMockSleeper).sleep(Duration.ofMillis(INTERVAL_MS - (INTERVAL_MS / 3))); } + + @Test + public void updateIntervalForSteppingTimer() throws Exception { + AtomicLong interval = new AtomicLong(10 * Constants.SECOND_MS); + AtomicLong tickCount = new AtomicLong(0L); + SteppingThreadSleeper sts = + new SteppingThreadSleeper(ThreadSleeper.INSTANCE, SystemClock.systemUTC()); + sts.setSleepStepMs(Constants.SECOND_MS); + SleepingTimer stimer = + new SleepingTimer(THREAD_NAME, mMockLogger, SystemClock.systemUTC(), sts, + () -> new FixedIntervalSupplier(interval.get())); + new Thread(() -> { + while (true) { + try { + stimer.tick(); + tickCount.incrementAndGet(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + }).start(); + long oldInterval = interval.get(); + Thread.sleep(oldInterval / 2); + long tickCountInit = tickCount.get(); + // scale in the interval + interval.set(oldInterval / 5); + stimer.update(); + Thread.sleep(oldInterval); + long newTickCount = tickCount.get(); + Assert.assertTrue("current tickCount = " + + newTickCount + " is not >= 5 + " + tickCountInit, newTickCount >= tickCountInit + 5); + } } diff --git a/core/common/src/test/java/alluxio/master/PollingMasterInquireClientTest.java b/core/common/src/test/java/alluxio/master/PollingMasterInquireClientTest.java index a5b1c088d3ca..27f9e9be568b 100644 --- a/core/common/src/test/java/alluxio/master/PollingMasterInquireClientTest.java +++ b/core/common/src/test/java/alluxio/master/PollingMasterInquireClientTest.java @@ -11,12 +11,20 @@ package alluxio.master; -import static org.junit.Assert.fail; +import static org.junit.Assert.assertThrows; import alluxio.Constants; +import alluxio.conf.AlluxioProperties; import alluxio.conf.ConfigurationBuilder; +import alluxio.conf.InstancedConfiguration; +import alluxio.conf.PropertyKey; import alluxio.exception.status.UnavailableException; +import alluxio.grpc.GrpcServer; +import alluxio.grpc.GrpcServerAddress; +import alluxio.grpc.GrpcServerBuilder; +import alluxio.grpc.GrpcService; import alluxio.grpc.ServiceType; +import alluxio.grpc.ServiceVersionClientServiceGrpc; import alluxio.network.RejectingServer; import alluxio.retry.CountingRetry; import alluxio.util.network.NetworkAddressUtils; @@ -46,11 +54,41 @@ public void pollRejectingDoesntHang() throws Exception { PollingMasterInquireClient client = new PollingMasterInquireClient(addrs, () -> new CountingRetry(0), new ConfigurationBuilder().build(), ServiceType.META_MASTER_CLIENT_SERVICE); + assertThrows("Expected polling to fail", UnavailableException.class, + client::getPrimaryRpcAddress); + } + + @Test(timeout = 10000) + public void concurrentPollingMaster() throws Exception { + int port1 = PortRegistry.reservePort(); + int port2 = PortRegistry.reservePort(); + InetSocketAddress serverAddress1 = new InetSocketAddress("127.0.0.1", port1); + InetSocketAddress serverAddress2 = new InetSocketAddress("127.0.0.1", port2); + RejectingServer s1 = new RejectingServer(serverAddress1, 20000); + GrpcServer s2 = + GrpcServerBuilder.forAddress(GrpcServerAddress.create(serverAddress2), + new InstancedConfiguration(new AlluxioProperties())) + .addService(ServiceType.META_MASTER_CLIENT_SERVICE, new GrpcService( + new ServiceVersionClientServiceGrpc.ServiceVersionClientServiceImplBase() { + })).build(); try { + s1.start(); + s2.start(); + List addrs = + Arrays.asList(InetSocketAddress.createUnresolved("127.0.0.1", port1), + InetSocketAddress.createUnresolved("127.0.0.1", port2)); + PollingMasterInquireClient client = new PollingMasterInquireClient(addrs, + () -> new CountingRetry(0), + new ConfigurationBuilder() + .setProperty(PropertyKey.USER_MASTER_POLLING_CONCURRENT, true) + .build(), + ServiceType.META_MASTER_CLIENT_SERVICE); client.getPrimaryRpcAddress(); - fail("Expected polling to fail"); - } catch (UnavailableException e) { - // Expected + } finally { + s1.stopAndJoin(); + s2.shutdown(); + PortRegistry.release(port1); + PortRegistry.release(port2); } } } diff --git a/core/common/src/test/java/alluxio/network/protocol/databuffer/RefCountedNioByteBufTest.java b/core/common/src/test/java/alluxio/network/protocol/databuffer/RefCountedNioByteBufTest.java index bfa2d76d6264..955c9d9b7a73 100644 --- a/core/common/src/test/java/alluxio/network/protocol/databuffer/RefCountedNioByteBufTest.java +++ b/core/common/src/test/java/alluxio/network/protocol/databuffer/RefCountedNioByteBufTest.java @@ -12,10 +12,13 @@ package alluxio.network.protocol.databuffer; import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; import alluxio.Constants; +import alluxio.util.io.BufferUtils; import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Suite; @@ -55,6 +58,18 @@ public void invalidCapacity() { assertThrows(IllegalArgumentException.class, () -> buf.capacity(10)); } + + @Test + public void setBytesWithAnotherByteBuf() { + ByteBuf srcBuf = Unpooled.directBuffer(100); + srcBuf.setBytes(0, BufferUtils.getIncreasingByteArray(100)); + ByteBuf dstBuf = new LeakyByteBuf(ByteBuffer.allocateDirect(100), 100, 100); + final int offset = 42; + final int length = 17; + dstBuf.setBytes(0, srcBuf, offset, length); + assertTrue(BufferUtils.equalIncreasingByteBuffer( + offset, length, dstBuf.slice(0, length).nioBuffer())); + } } private static class LeakyByteBuf extends RefCountedNioByteBuf { diff --git a/core/common/src/test/java/alluxio/resource/DynamicResourcePoolTest.java b/core/common/src/test/java/alluxio/resource/DynamicResourcePoolTest.java index ff1677aabd3c..3b06c2f217b9 100644 --- a/core/common/src/test/java/alluxio/resource/DynamicResourcePoolTest.java +++ b/core/common/src/test/java/alluxio/resource/DynamicResourcePoolTest.java @@ -166,7 +166,8 @@ public void acquireWithCapacity() throws Exception { */ @Test public void acquireFIFO() throws Exception { - TestPool pool = new TestPool(DynamicResourcePool.Options.defaultOptions().setFIFO(true)); + TestPool pool = new TestPool(DynamicResourcePool.Options.defaultOptions().setSelectionPolicy( + DynamicResourcePool.SelectionPolicy.FIFO)); List resourceList = new ArrayList<>(); for (int i = 0; i < 3; i++) { Resource resource = pool.acquire(); diff --git a/core/common/src/test/java/alluxio/underfs/FingerprintTest.java b/core/common/src/test/java/alluxio/underfs/FingerprintTest.java index 0e1e6fa4ad74..9f0ddee1efb6 100644 --- a/core/common/src/test/java/alluxio/underfs/FingerprintTest.java +++ b/core/common/src/test/java/alluxio/underfs/FingerprintTest.java @@ -119,6 +119,12 @@ public void createFingerprintFromUfsStatus() { assertEquals(owner, fp.getTag(Fingerprint.Tag.OWNER)); assertEquals(group, fp.getTag(Fingerprint.Tag.GROUP)); assertEquals(String.valueOf(mode), fp.getTag(Fingerprint.Tag.MODE)); + assertEquals(contentHash, fp.getTag(Fingerprint.Tag.CONTENT_HASH)); + + // create a fingerprint with a custom content hash + String contentHash2 = CommonUtils.randomAlphaNumString(10); + fp = Fingerprint.create(ufsName, fileStatus, contentHash2); + assertEquals(contentHash2, fp.getTag(Fingerprint.Tag.CONTENT_HASH)); } @Test @@ -131,11 +137,21 @@ public void createACLFingeprint() { CommonUtils.randomAlphaNumString(10), CommonUtils.randomAlphaNumString(10), Arrays.asList("user::rw-", "group::r--", "other::rwx")); - Fingerprint fp = Fingerprint.create(CommonUtils.randomAlphaNumString(10), status, acl); + Fingerprint fp = Fingerprint.create(CommonUtils.randomAlphaNumString(10), status, null, acl); String expected = fp.serialize(); assertNotNull(expected); assertEquals("user::rw-,group::r--,other::rwx", Fingerprint.parse(expected).getTag(Fingerprint.Tag.ACL)); assertEquals(expected, Fingerprint.parse(expected).serialize()); } + + @Test + public void sanitizeString() { + Fingerprint dummy = Fingerprint.INVALID_FINGERPRINT; + assertEquals("foobar", dummy.sanitizeString("foobar")); + assertEquals("foo_bar", dummy.sanitizeString("foo bar")); + assertEquals("foo_bar", dummy.sanitizeString("foo|bar")); + assertEquals("foo_bar_baz", dummy.sanitizeString("foo bar|baz")); + assertEquals("foo_bar_baz_qux", dummy.sanitizeString("foo bar baz qux")); + } } diff --git a/core/common/src/test/java/alluxio/underfs/ObjectUnderFileSystemTest.java b/core/common/src/test/java/alluxio/underfs/ObjectUnderFileSystemTest.java index b1351a0b58db..eb0c1e6bf23d 100644 --- a/core/common/src/test/java/alluxio/underfs/ObjectUnderFileSystemTest.java +++ b/core/common/src/test/java/alluxio/underfs/ObjectUnderFileSystemTest.java @@ -11,23 +11,37 @@ package alluxio.underfs; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import alluxio.AlluxioURI; +import alluxio.ConfigurationRule; import alluxio.conf.AlluxioConfiguration; import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.file.options.DescendantType; +import alluxio.underfs.options.ListOptions; +import com.google.common.collect.ImmutableMap; +import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; import org.mockito.Mockito; import java.io.FileNotFoundException; import java.io.IOException; import java.net.SocketException; +import java.util.stream.Collectors; public class ObjectUnderFileSystemTest { private static final AlluxioConfiguration CONF = Configuration.global(); + @Rule + public ConfigurationRule mConfigurationRule = new ConfigurationRule(ImmutableMap.of( + PropertyKey.UNDERFS_EVENTUAL_CONSISTENCY_RETRY_MAX_NUM, 20), + Configuration.modifiableGlobal()); + private ObjectUnderFileSystem mObjectUFS = new MockObjectUnderFileSystem(new AlluxioURI("/"), UnderFileSystemConfiguration.defaults(CONF)); @@ -60,4 +74,47 @@ public void testRetryOnException() { fail(); } } + + @Test + public void testListObjectStorageDescendantTypeNone() throws Throwable { + mObjectUFS = new MockObjectUnderFileSystem(new AlluxioURI("/"), + UnderFileSystemConfiguration.defaults(CONF)) { + final UfsStatus mF1Status = new UfsFileStatus("f1", "", 0L, 0L, "", "", (short) 0777, 0L); + final UfsStatus mF2Status = new UfsFileStatus("f2", "", 1L, 0L, "", "", (short) 0777, 0L); + + @Override + public UfsStatus getStatus(String path) throws IOException { + if (path.equals("root/f1")) { + return mF1Status; + } else if (path.equals("root/f2")) { + return mF2Status; + } + throw new FileNotFoundException(); + } + + @Override + public UfsStatus[] listStatus(String path) throws IOException { + if (path.equals("root") || path.equals("root/")) { + return new UfsStatus[] {mF1Status, mF2Status}; + } + return new UfsStatus[0]; + } + + @Override + public UfsStatus[] listStatus(String path, ListOptions options) throws IOException { + return listStatus(path); + } + + @Override + protected ObjectPermissions getPermissions() { + return new ObjectPermissions("foo", "bar", (short) 0777); + } + }; + + UfsLoadResult result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mObjectUFS, "root", DescendantType.NONE); + Assert.assertEquals(1, result.getItemsCount()); + UfsStatus status = result.getItems().collect(Collectors.toList()).get(0); + assertEquals("root", status.getName()); + } } diff --git a/core/common/src/test/java/alluxio/underfs/UnderFileSystemTestUtil.java b/core/common/src/test/java/alluxio/underfs/UnderFileSystemTestUtil.java new file mode 100644 index 000000000000..5b52f6cdd9d2 --- /dev/null +++ b/core/common/src/test/java/alluxio/underfs/UnderFileSystemTestUtil.java @@ -0,0 +1,49 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs; + +import alluxio.file.options.DescendantType; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Test utils for UFS. + */ +public class UnderFileSystemTestUtil { + /** + * A test helper convert the async performListingAsync call to a sync one. + * @param ufs the ufs object + * @param path the path + * @param descendantType the descendant type + * @return the ufs load result + */ + public static UfsLoadResult performListingAsyncAndGetResult( + UnderFileSystem ufs, String path, DescendantType descendantType) throws Throwable { + CountDownLatch latch = new CountDownLatch(1); + AtomicReference throwable = new AtomicReference<>(); + AtomicReference result = new AtomicReference<>(); + ufs.performListingAsync(path, null, null, descendantType, descendantType == DescendantType.NONE, + (r) -> { + result.set(r); + latch.countDown(); + }, (t) -> { + throwable.set(t); + latch.countDown(); + }); + latch.await(); + if (throwable.get() != null) { + throw throwable.get(); + } + return result.get(); + } +} diff --git a/core/common/src/test/java/alluxio/util/RateLimiterTest.java b/core/common/src/test/java/alluxio/util/RateLimiterTest.java new file mode 100644 index 000000000000..f7d4e05a616a --- /dev/null +++ b/core/common/src/test/java/alluxio/util/RateLimiterTest.java @@ -0,0 +1,88 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import com.google.common.base.Ticker; +import org.junit.Before; +import org.junit.Test; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +public class RateLimiterTest { + + private final Ticker mTicker = new Ticker() { + @Override + public long read() { + return mTime; + } + }; + + private long mTime; + + @Before + public void before() { + mTime = 0; + } + + @Test + public void testFastRequests() { + long permitsPerSecond = 10; + long timePerPermit = Duration.ofSeconds(1).toNanos() / permitsPerSecond; + SimpleRateLimiter rateLimiter = new SimpleRateLimiter(permitsPerSecond, mTicker); + + // if the timer is moving as fast as the permits then there should be no waiting + for (int i = 0; i < 10; i++) { + mTime += timePerPermit; + assertFalse(rateLimiter.acquire().isPresent()); + } + // if we move forward a large amount, we should still only get 1 new permit + mTime += timePerPermit * 100; + assertFalse(rateLimiter.acquire().isPresent()); + assertTrue(rateLimiter.acquire().isPresent()); + + mTime += timePerPermit; + assertTrue(rateLimiter.acquire().isPresent()); + + mTime += timePerPermit * 2; + assertFalse(rateLimiter.acquire().isPresent()); + + Optional permit = rateLimiter.acquire(); + assertTrue(permit.isPresent()); + mTime += timePerPermit; + assertEquals(mTime, (long) permit.get()); + } + + @Test + public void testSlowRequests() { + long permitsPerSecond = 10; + long timePerPermit = Duration.ofSeconds(1).toNanos() / permitsPerSecond; + SimpleRateLimiter rateLimiter = new SimpleRateLimiter(permitsPerSecond, mTicker); + List permits = new ArrayList<>(); + for (int i = 0; i < permitsPerSecond; i++) { + Optional permit = rateLimiter.acquire(); + assertTrue(permit.isPresent()); + permits.add(permit.get()); + } + assertEquals(Duration.ofSeconds(1).toNanos(), (long) permits.get(permits.size() - 1)); + for (int i = 0; i < permitsPerSecond; i++) { + mTime += timePerPermit; + assertEquals(0, rateLimiter.getWaitTimeNanos(permits.get(i))); + } + } +} diff --git a/core/common/src/test/java/alluxio/util/io/PathUtilsTest.java b/core/common/src/test/java/alluxio/util/io/PathUtilsTest.java index cd34c8a078b9..81d7e1ac01da 100644 --- a/core/common/src/test/java/alluxio/util/io/PathUtilsTest.java +++ b/core/common/src/test/java/alluxio/util/io/PathUtilsTest.java @@ -507,4 +507,34 @@ public void normalizePath() throws Exception { assertEquals("/foo/bar//", PathUtils.normalizePath("/foo/bar//", "/")); assertEquals("/foo/bar%", PathUtils.normalizePath("/foo/bar", "%")); } + + /** + * Tests the {@link PathUtils#getPossibleMountPoints(String)} method to + * throw an exception in case the path is invalid. + */ + @Test + public void getPossibleMountPointsException() throws InvalidPathException { + mException.expect(InvalidPathException.class); + PathUtils.getPossibleMountPoints(""); + } + + /** + * Tests the {@link PathUtils#getPossibleMountPoints(String)} method. + */ + @Test + public void getPossibleMountPointsNoException() throws InvalidPathException { + ArrayList paths = new ArrayList<>(); + assertEquals(paths, PathUtils.getPossibleMountPoints("/")); + assertEquals(paths, PathUtils.getPossibleMountPoints("//")); + + paths.add("/a"); + assertEquals(paths, PathUtils.getPossibleMountPoints("/a")); + assertEquals(paths, PathUtils.getPossibleMountPoints("/a/")); + paths.add("/a/b"); + assertEquals(paths, PathUtils.getPossibleMountPoints("/a/b")); + assertEquals(paths, PathUtils.getPossibleMountPoints("/a/b/")); + paths.add("/a/b/c"); + assertEquals(paths, PathUtils.getPossibleMountPoints("/a/b/c")); + assertEquals(paths, PathUtils.getPossibleMountPoints("/a/b/c/")); + } } diff --git a/core/common/src/test/java/alluxio/util/proto/BlockLocationUtilsTest.java b/core/common/src/test/java/alluxio/util/proto/BlockLocationUtilsTest.java new file mode 100644 index 000000000000..2655aee3e36d --- /dev/null +++ b/core/common/src/test/java/alluxio/util/proto/BlockLocationUtilsTest.java @@ -0,0 +1,49 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.proto; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertSame; + +import alluxio.proto.meta.Block.BlockLocation; + +import org.junit.Test; + +public class BlockLocationUtilsTest { + @Test + public void testBlockLocationCached() { + BlockLocation location1 = BlockLocationUtils.getCached(1, "HDD", "SSD"); + assertEquals("HDD", location1.getTier()); + assertEquals("SSD", location1.getMediumType()); + assertEquals(1, location1.getWorkerId()); + + BlockLocation location2 = BlockLocationUtils.getCached(1, "HDD", "SSD"); + assertSame(location1, location2); + assertEquals(location1, location2); + + BlockLocation location3 = BlockLocationUtils.getCached(location2); + assertSame(location1, location3); + assertEquals(location1, location3); + + BlockLocationUtils.evictByWorkerId(1); + + BlockLocation location4 = BlockLocationUtils.getCached(1, "HDD", "SSD"); + assertNotSame(location1, location4); + assertEquals(location1, location4); + } + + @Test(expected = IllegalStateException.class) + public void testInvalidValue() { + BlockLocationUtils.getCached(1, "INVALID", "SSD"); + } +} diff --git a/core/common/src/test/java/alluxio/wire/MasterInfoTest.java b/core/common/src/test/java/alluxio/wire/MasterInfoTest.java index 51b41f6cc1ca..8c47a594070b 100644 --- a/core/common/src/test/java/alluxio/wire/MasterInfoTest.java +++ b/core/common/src/test/java/alluxio/wire/MasterInfoTest.java @@ -11,7 +11,10 @@ package alluxio.wire; +import alluxio.util.CommonUtils; + import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.RandomStringUtils; import org.junit.Assert; import org.junit.Test; @@ -32,7 +35,14 @@ public void json() throws Exception { public void checkEquality(MasterInfo a, MasterInfo b) { Assert.assertEquals(a.getId(), b.getId()); Assert.assertEquals(a.getAddress(), b.getAddress()); - Assert.assertEquals(a.getLastUpdatedTimeMs(), b.getLastUpdatedTimeMs()); + Assert.assertEquals(a.getLastUpdatedTime(), b.getLastUpdatedTime()); + Assert.assertEquals(a.getStartTime(), b.getStartTime()); + Assert.assertEquals(a.getGainPrimacyTime(), b.getGainPrimacyTime()); + Assert.assertEquals(a.getLosePrimacyTime(), b.getLosePrimacyTime()); + Assert.assertEquals(a.getLastCheckpointTime(), b.getLastCheckpointTime()); + Assert.assertEquals(a.getJournalEntriesSinceCheckpoint(), b.getJournalEntriesSinceCheckpoint()); + Assert.assertEquals(a.getVersion(), b.getVersion()); + Assert.assertEquals(a.getRevision(), b.getRevision()); Assert.assertEquals(a, b); } @@ -40,9 +50,25 @@ public static MasterInfo createRandom() { Random random = new Random(); long id = random.nextLong(); Address address = new Address(RandomStringUtils.randomAlphanumeric(10), random.nextInt()); + long lastUpdatedTimeMs = CommonUtils.getCurrentMs(); + long gainPrimacyTimeMs = lastUpdatedTimeMs - random.nextInt(10000); + long losePrimacyTimeMs = lastUpdatedTimeMs - random.nextInt(10000); + long startTimeMs = gainPrimacyTimeMs - random.nextInt(10000); + String version = String.format("%d.%d.%d", random.nextInt(10), + random.nextInt(20), random.nextInt(10)); + String revision = DigestUtils.sha1Hex(RandomStringUtils.random(10)); + long lastCheckpointTime = startTimeMs + (lastUpdatedTimeMs - startTimeMs) / 2; + long journalEntriesSinceCheckpoint = random.nextInt(1000); MasterInfo result = new MasterInfo(id, address); - result.updateLastUpdatedTimeMs(); + result.setLastUpdatedTimeMs(lastUpdatedTimeMs); + result.setStartTimeMs(startTimeMs); + result.setGainPrimacyTimeMs(gainPrimacyTimeMs); + result.setLosePrimacyTimeMs(losePrimacyTimeMs); + result.setLastCheckpointTimeMs(lastCheckpointTime); + result.setJournalEntriesSinceCheckpoint(journalEntriesSinceCheckpoint); + result.setVersion(version); + result.setRevision(revision); return result; } } diff --git a/core/common/src/test/java/alluxio/wire/WorkerInfoTest.java b/core/common/src/test/java/alluxio/wire/WorkerInfoTest.java index 00cf71e5390a..88d2a00d927e 100644 --- a/core/common/src/test/java/alluxio/wire/WorkerInfoTest.java +++ b/core/common/src/test/java/alluxio/wire/WorkerInfoTest.java @@ -92,6 +92,7 @@ public static WorkerInfo createRandom() { String version = String.format("%d.%d.%d", random.nextInt(10), random.nextInt(20), random.nextInt(10)); String revision = DigestUtils.sha1Hex(RandomStringUtils.random(10)); + int numVCpu = random.nextInt(128); result.setId(id); result.setAddress(address); @@ -104,6 +105,7 @@ public static WorkerInfo createRandom() { result.setUsedBytesOnTiers(usedBytesOnTiers); result.setVersion(version); result.setRevision(revision); + result.setNumVCpu(numVCpu); return result; } } diff --git a/core/server/common/pom.xml b/core/server/common/pom.xml index 457fb9013a5e..ec5b82504477 100644 --- a/core/server/common/pom.xml +++ b/core/server/common/pom.xml @@ -26,7 +26,7 @@ ${project.parent.parent.parent.basedir}/build - 2.4.0 + 2.4.1 @@ -135,6 +135,11 @@ + + org.lz4 + lz4-java + 1.8.0 + diff --git a/core/server/common/src/main/java/alluxio/ProcessUtils.java b/core/server/common/src/main/java/alluxio/ProcessUtils.java index 802d6d77a986..4e9d13c464bb 100644 --- a/core/server/common/src/main/java/alluxio/ProcessUtils.java +++ b/core/server/common/src/main/java/alluxio/ProcessUtils.java @@ -11,19 +11,51 @@ package alluxio; +import static alluxio.metrics.sink.MetricsServlet.OBJECT_MAPPER; + import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.metrics.MetricsSystem; +import alluxio.util.CommonUtils; +import alluxio.util.ThreadUtils; import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.time.format.FormatStyle; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; + /** * Utility methods for Alluxio {@link Process}es. */ public final class ProcessUtils { private static final Logger LOG = LoggerFactory.getLogger(ProcessUtils.class); + public static final Set COLLECT_ON_EXIT = + ImmutableSet.of(CommonUtils.ProcessType.MASTER, CommonUtils.ProcessType.WORKER); + public static volatile boolean sInfoDumpOnExitCheck = false; + public static final DateTimeFormatter DATETIME_FORMAT = + DateTimeFormatter.ofLocalizedDateTime(FormatStyle.SHORT).ofPattern("yyyyMMdd-HHmmss") + .withLocale(Locale.getDefault()).withZone(ZoneId.systemDefault()); + /** * Runs the given {@link Process}. This method should only be called from {@code main()} methods. * @@ -36,6 +68,9 @@ public static void run(Process process) { LOG.info("Java version: {}", System.getProperty("java.version")); process.start(); LOG.info("Stopping {}.", process); + + dumpInformationOnExit(); + System.exit(0); } catch (Throwable t) { LOG.error("Uncaught exception while running {}, stopping it and exiting. " @@ -48,6 +83,8 @@ public static void run(Process process) { + "Exception \"{}\", Root Cause \"{}\"", process, t2, Throwables.getRootCause(t2), t2); } + dumpInformationOnExit(); + System.exit(-1); } } @@ -80,6 +117,9 @@ public static void fatalError(Logger logger, Throwable t, String format, Object. throw new RuntimeException(message); } logger.error(message); + + dumpInformationOnExit(); + System.exit(-1); } @@ -95,6 +135,7 @@ public static void fatalError(Logger logger, Throwable t, String format, Object. public static void stopProcessOnShutdown(final Process process) { Runtime.getRuntime().addShutdownHook(new Thread(() -> { try { + dumpInformationOnExit(); process.stop(); } catch (Throwable t) { LOG.error("Failed to stop process", t); @@ -102,5 +143,110 @@ public static void stopProcessOnShutdown(final Process process) { }, "alluxio-process-shutdown-hook")); } + /** + * Outputs process critical information like metrics and jstack before it exits. + * This is synchronous in order to capture as much information at the scene as possible. + * The information will be output to separate files in the log directory. + */ + public static void dumpInformationOnExit() { + if (!COLLECT_ON_EXIT.contains(CommonUtils.PROCESS_TYPE.get())) { + LOG.info("Process type is {}, skip dumping metrics and thread stacks", + CommonUtils.PROCESS_TYPE.get()); + return; + } + if (Configuration.getBoolean(PropertyKey.EXIT_COLLECT_INFO)) { + synchronized (ProcessUtils.class) { + if (!sInfoDumpOnExitCheck) { + sInfoDumpOnExitCheck = true; + LOG.info("Logging metrics and jstack on {} exit...", CommonUtils.PROCESS_TYPE.get()); + try { + String logsDir = Configuration.getString(PropertyKey.LOGS_DIR); + String outputFilePrefix = "alluxio-" + + CommonUtils.PROCESS_TYPE.get().toString().toLowerCase() + "-exit"; + dumpMetrics(logsDir, outputFilePrefix); + dumpStacks(logsDir, outputFilePrefix); + } catch (Throwable t) { + LOG.error("Failed to dump metrics and jstacks", t); + } + } + } + } else { + LOG.info("Not logging metrics and jstack on exit, set {}=true to enable this feature", + PropertyKey.EXIT_COLLECT_INFO.getName()); + } + } + + /** + * Outputs process critical information like metrics and jstack before the primary master + * fails over to standby. This is asynchronous in order not to block the failover. + * The information will be output to separate files in the log directory. + * + * @param es the thread pool to submit tasks to + * @return a list of futures for async info dumping jobs + */ + public static List> dumpInformationOnFailover(ExecutorService es) { + if (Configuration.getBoolean(PropertyKey.MASTER_FAILOVER_COLLECT_INFO)) { + LOG.info("Logging metrics and jstack when primary master switches to standby..."); + String logsDir = Configuration.getString(PropertyKey.LOGS_DIR); + String outputFilePrefix = "alluxio-" + + CommonUtils.PROCESS_TYPE.get().toString().toLowerCase() + "-failover"; + List> futures = new ArrayList<>(); + // Attempt to dump metrics first before MetricsMaster clears all metrics + // The failover procedure will shutdown RPC -> Journal -> Master components + // So we rely on the first two steps take longer than this thread + futures.add(es.submit(() -> { + ProcessUtils.dumpMetrics(logsDir, outputFilePrefix); + return null; + })); + futures.add(es.submit(() -> { + ProcessUtils.dumpStacks(logsDir, outputFilePrefix); + return null; + })); + LOG.info("Started dumping metrics and jstacks into {}", logsDir); + return futures; + } else { + LOG.info("Not logging information like metrics and jstack on failover, " + + "set {}=true to enable this feature", + PropertyKey.MASTER_FAILOVER_COLLECT_INFO.getName()); + return Collections.emptyList(); + } + } + + private static void dumpMetrics(String logsDir, String outputFilePrefix) { + Instant start = Instant.now(); + String childFilePath = String.format("%s-metrics-%s.json", + outputFilePrefix, DATETIME_FORMAT.format(start)); + File metricDumpFile = new File(logsDir, childFilePath); + try (FileOutputStream fos = new FileOutputStream(metricDumpFile, false)) { + // The metrics json string is ~100KB in size + String outputContents = OBJECT_MAPPER.writerWithDefaultPrettyPrinter() + .writeValueAsString(MetricsSystem.METRIC_REGISTRY); + fos.getChannel().write(ByteBuffer.wrap(outputContents.getBytes(StandardCharsets.UTF_8))); + } catch (IOException e) { + LOG.error("Failed to persist metrics to {}", metricDumpFile.getAbsolutePath(), e); + return; + } + Instant end = Instant.now(); + LOG.info("Dumped metrics of current process in {}ms to {}", + Duration.between(start, end).toMillis(), childFilePath); + } + + private static void dumpStacks(String logsDir, String outputFilePrefix) { + Instant start = Instant.now(); + String childFilePath = String.format("%s-stacks-%s.txt", + outputFilePrefix, DATETIME_FORMAT.format(start)); + File stacksDumpFile = new File(logsDir, childFilePath); + try (PrintStream stream = new PrintStream(stacksDumpFile)) { + // Dumping one thread produces <1KB + ThreadUtils.printThreadInfo(stream, "Dumping all threads in process"); + } catch (IOException e) { + LOG.error("Failed to persist thread stacks to {}", stacksDumpFile.getAbsolutePath(), e); + return; + } + Instant end = Instant.now(); + LOG.info("Dumped jstack of current process in {}ms to {}", + Duration.between(start, end).toMillis(), childFilePath); + } + private ProcessUtils() {} // prevent instantiation } diff --git a/core/server/common/src/main/java/alluxio/RestUtils.java b/core/server/common/src/main/java/alluxio/RestUtils.java index aafd4d8c2b17..714bc297b352 100644 --- a/core/server/common/src/main/java/alluxio/RestUtils.java +++ b/core/server/common/src/main/java/alluxio/RestUtils.java @@ -12,7 +12,6 @@ package alluxio; import alluxio.conf.AlluxioConfiguration; -import alluxio.conf.PropertyKey; import alluxio.exception.status.AlluxioStatusException; import alluxio.security.authentication.AuthenticatedClientUser; import alluxio.security.user.ServerUserState; @@ -119,10 +118,6 @@ private static Response createResponse(Object object, AlluxioConfiguration allux headers.forEach(rb::header); } - if (alluxioConf.getBoolean(PropertyKey.WEB_CORS_ENABLED)) { - return makeCORS(rb).build(); - } - return rb.build(); } @@ -175,43 +170,10 @@ private static Response createErrorResponse(Exception e, AlluxioConfiguration al ErrorResponse response = new ErrorResponse(se.getStatus().getCode(), se.getMessage()); Response.ResponseBuilder rb = Response.serverError().entity(response); - if (alluxioConf.getBoolean(PropertyKey.WEB_CORS_ENABLED)) { - return makeCORS(rb).build(); - } return rb.build(); } - /** - * Makes the responseBuilder CORS compatible. - * - * @param responseBuilder the response builder - * @param returnMethod the modified response builder - * @return response builder - */ - public static Response.ResponseBuilder makeCORS(Response.ResponseBuilder responseBuilder, - String returnMethod) { - // TODO(william): Make origin, methods, and headers configurable. - Response.ResponseBuilder rb = responseBuilder.header("Access-Control-Allow-Origin", "*") - .header("Access-Control-Allow-Methods", "GET, POST, OPTIONS"); - - if (!"".equals(returnMethod)) { - rb.header("Access-Control-Allow-Headers", returnMethod); - } - - return rb; - } - - /** - * Makes the responseBuilder CORS compatible, assumes default methods. - * - * @param responseBuilder the modified response builder - * @return response builder - */ - public static Response.ResponseBuilder makeCORS(Response.ResponseBuilder responseBuilder) { - return makeCORS(responseBuilder, ""); - } - private RestUtils() { } // prevent instantiation } diff --git a/core/server/common/src/main/java/alluxio/RpcUtils.java b/core/server/common/src/main/java/alluxio/RpcUtils.java index b4698c111c46..e77a16d9e638 100644 --- a/core/server/common/src/main/java/alluxio/RpcUtils.java +++ b/core/server/common/src/main/java/alluxio/RpcUtils.java @@ -21,6 +21,7 @@ import alluxio.metrics.MetricsSystem; import alluxio.security.User; import alluxio.security.authentication.AuthenticatedClientUser; +import alluxio.security.authentication.ClientContextServerInjector; import com.codahale.metrics.Timer; import io.grpc.StatusException; @@ -121,7 +122,10 @@ public static T callAndReturn(Logger logger, RpcCallableThrowsIOException MetricsSystem.timer(MetricKey.MASTER_TOTAL_RPCS.getName()), MetricsSystem.timer(getQualifiedMetricName(methodName)))) { MetricsSystem.counter(getQualifiedInProgressMetricName(methodName)).inc(); - logger.debug("Enter: {}: {}", methodName, debugDesc); + logger.debug("Enter: {} from {}: {} client version: {}", methodName, + ClientContextServerInjector.getIpAddress(), + ClientContextServerInjector.getClientVersion(), + debugDesc); T res = callable.call(); logger.debug("Exit: {}: {}", methodName, debugDesc); return res; diff --git a/core/server/common/src/main/java/alluxio/executor/ExecutorServiceBuilder.java b/core/server/common/src/main/java/alluxio/executor/ExecutorServiceBuilder.java index d1633848b7fb..a040c776156e 100644 --- a/core/server/common/src/main/java/alluxio/executor/ExecutorServiceBuilder.java +++ b/core/server/common/src/main/java/alluxio/executor/ExecutorServiceBuilder.java @@ -17,6 +17,7 @@ import alluxio.master.AlluxioExecutorService; import alluxio.util.ThreadFactoryUtils; +import com.codahale.metrics.Counter; import com.google.common.base.Preconditions; import java.util.concurrent.ArrayBlockingQueue; @@ -26,6 +27,7 @@ import java.util.concurrent.SynchronousQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import javax.annotation.Nullable; /** * Used to create {@link ExecutorService} instances dynamically by configuration. @@ -38,6 +40,18 @@ public class ExecutorServiceBuilder { * @return instance of {@link ExecutorService} */ public static AlluxioExecutorService buildExecutorService(RpcExecutorHost executorHost) { + return buildExecutorService(executorHost, null); + } + + /** + * Creates an {@link ExecutorService} for given Alluxio process dynamically by configuration. + * + * @param executorHost Where the executor is needed + * @param rpcCounter the counter to track ongoing RPC + * @return instance of {@link ExecutorService} + */ + public static AlluxioExecutorService buildExecutorService( + RpcExecutorHost executorHost, @Nullable Counter rpcCounter) { // Get executor type for given host. RpcExecutorType executorType = Configuration.getEnum( PropertyKey.Template.RPC_EXECUTOR_TYPE.format(executorHost.toString()), @@ -123,7 +137,7 @@ public static AlluxioExecutorService buildExecutorService(RpcExecutorHost execut // Post settings. ((ThreadPoolExecutor) executorService).allowCoreThreadTimeOut(allowCoreThreadsTimeout); } - return new AlluxioExecutorService(executorService); + return new AlluxioExecutorService(executorService, rpcCounter); } /** diff --git a/core/server/common/src/main/java/alluxio/master/AbstractMaster.java b/core/server/common/src/main/java/alluxio/master/AbstractMaster.java index 0850f1166996..bc4a88862b40 100644 --- a/core/server/common/src/main/java/alluxio/master/AbstractMaster.java +++ b/core/server/common/src/main/java/alluxio/master/AbstractMaster.java @@ -47,7 +47,7 @@ public abstract class AbstractMaster implements Master { /** The executor used for running maintenance threads for the master. */ private ExecutorService mExecutorService; /** A handler to the journal for this master. */ - private final Journal mJournal; + protected final Journal mJournal; /** true if this master is in primary mode, and not standby mode. */ private boolean mIsPrimary = false; diff --git a/core/server/common/src/main/java/alluxio/master/AlluxioExecutorService.java b/core/server/common/src/main/java/alluxio/master/AlluxioExecutorService.java index a18c82554ca8..d12f4cb5437b 100644 --- a/core/server/common/src/main/java/alluxio/master/AlluxioExecutorService.java +++ b/core/server/common/src/main/java/alluxio/master/AlluxioExecutorService.java @@ -13,21 +13,26 @@ import alluxio.concurrent.jsr.ForkJoinPool; +import com.codahale.metrics.Counter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.util.Collection; import java.util.List; import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; /** * Forwarder over ExecutorService interface for exposing internal queue length. */ public class AlluxioExecutorService implements ExecutorService { + private static final Logger LOG = LoggerFactory.getLogger(AlluxioExecutorService.class); + private ExecutorService mExecutor; + private final Counter mRpcTracker; /** * Creates Alluxio ExecutorService wrapper. @@ -36,6 +41,18 @@ public class AlluxioExecutorService implements ExecutorService { */ public AlluxioExecutorService(ExecutorService executor) { mExecutor = executor; + mRpcTracker = null; + } + + /** + * Creates Alluxio ExecutorService wrapper. + * + * @param executor underlying executor + * @param counter the counter to track active operations + */ + public AlluxioExecutorService(ExecutorService executor, Counter counter) { + mExecutor = executor; + mRpcTracker = counter; } /** @@ -82,11 +99,23 @@ public long getPoolSize() { @Override public void shutdown() { + if (mRpcTracker != null) { + long activeRpcCount = mRpcTracker.getCount(); + if (activeRpcCount > 0) { + LOG.warn("{} operations have not completed", activeRpcCount); + } + } mExecutor.shutdown(); } @Override public List shutdownNow() { + if (mRpcTracker != null) { + long activeRpcCount = mRpcTracker.getCount(); + if (activeRpcCount > 0) { + LOG.warn("{} operations have not completed", activeRpcCount); + } + } return mExecutor.shutdownNow(); } @@ -107,45 +136,106 @@ public boolean awaitTermination(long timeout, TimeUnit unit) throws InterruptedE @Override public Future submit(Callable task) { - return mExecutor.submit(task); + if (mRpcTracker != null) { + mRpcTracker.inc(); + LOG.trace("Inc from rpc server in submit(Callable)"); + } + try { + return mExecutor.submit(task); + } finally { + if (mRpcTracker != null) { + mRpcTracker.dec(); + } + } } @Override public Future submit(Runnable task, T result) { - return mExecutor.submit(task, result); + if (mRpcTracker != null) { + mRpcTracker.inc(); + LOG.trace("Inc from rpc server in submit(Runnable,T)"); + } + try { + return mExecutor.submit(task, result); + } finally { + if (mRpcTracker != null) { + mRpcTracker.dec(); + } + } } @Override public Future submit(Runnable task) { - return mExecutor.submit(task); + if (mRpcTracker != null) { + mRpcTracker.inc(); + LOG.trace("Inc from rpc server in submit(Runnable)"); + } + try { + return mExecutor.submit(task); + } finally { + if (mRpcTracker != null) { + mRpcTracker.dec(); + } + } } @Override public List> invokeAll(Collection> tasks) throws InterruptedException { - return mExecutor.invokeAll(tasks); + if (mRpcTracker != null) { + mRpcTracker.inc(); + LOG.trace("Inc from rpc server in invokeAll(Collection)"); + } + try { + return mExecutor.invokeAll(tasks); + } finally { + if (mRpcTracker != null) { + mRpcTracker.dec(); + } + } } @Override public List> invokeAll(Collection> tasks, long timeout, TimeUnit unit) throws InterruptedException { - return mExecutor.invokeAll(tasks, timeout, unit); + if (mRpcTracker != null) { + mRpcTracker.inc(); + LOG.trace("Inc from rpc server in invokeAll(Collection,long,TimeUnit)"); + } + try { + return mExecutor.invokeAll(tasks, timeout, unit); + } finally { + if (mRpcTracker != null) { + mRpcTracker.dec(); + } + } } @Override - public T invokeAny(Collection> tasks) - throws InterruptedException, ExecutionException { - return null; + public T invokeAny(Collection> tasks) { + // Not used. Also the active counter is hard, so we do not support it. + throw new UnsupportedOperationException("invokeAny(Collection) is not supported"); } @Override - public T invokeAny(Collection> tasks, long timeout, TimeUnit unit) - throws InterruptedException, ExecutionException, TimeoutException { - return mExecutor.invokeAny(tasks, timeout, unit); + public T invokeAny(Collection> tasks, long timeout, TimeUnit unit) { + // Not used. Also the active counter is hard, so we do not support it. + throw new UnsupportedOperationException( + "invokeAny(Collection,long,TimeUnit) is not supported"); } @Override public void execute(Runnable command) { - mExecutor.execute(command); + if (mRpcTracker != null) { + mRpcTracker.inc(); + LOG.trace("Inc from rpc server in execute(Runnable)"); + } + try { + mExecutor.execute(command); + } finally { + if (mRpcTracker != null) { + mRpcTracker.dec(); + } + } } } diff --git a/core/server/common/src/main/java/alluxio/master/Master.java b/core/server/common/src/main/java/alluxio/master/Master.java index 1e475973c7df..13858c22ae92 100644 --- a/core/server/common/src/main/java/alluxio/master/Master.java +++ b/core/server/common/src/main/java/alluxio/master/Master.java @@ -13,9 +13,14 @@ import alluxio.Server; import alluxio.exception.status.UnavailableException; +import alluxio.grpc.GrpcService; +import alluxio.grpc.ServiceType; import alluxio.master.journal.JournalContext; import alluxio.master.journal.Journaled; +import java.util.Collections; +import java.util.Map; + /** * This interface contains common operations for all masters. */ @@ -29,4 +34,12 @@ public interface Master extends Journaled, Server { * @return a master context */ MasterContext getMasterContext(); + + /** + * @return a map from service names to gRPC services that serve RPCs for this master, + * if the master is a standby master. + */ + default Map getStandbyServices() { + return Collections.emptyMap(); + } } diff --git a/core/server/common/src/main/java/alluxio/master/StateLockManager.java b/core/server/common/src/main/java/alluxio/master/StateLockManager.java index 25baa80a74be..8c5be7096aff 100644 --- a/core/server/common/src/main/java/alluxio/master/StateLockManager.java +++ b/core/server/common/src/main/java/alluxio/master/StateLockManager.java @@ -28,15 +28,20 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.Date; import java.util.List; +import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.LongAdder; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -59,9 +64,10 @@ public class StateLockManager { /** The state-lock. */ private ReentrantReadWriteLock mStateLock = new ReentrantReadWriteLock(true); - /** The set of threads that are waiting for or holding the state-lock in shared mode. */ private Set mSharedWaitersAndHolders; + /** Stores the name of each thread whos taking locks. */ + private Map mSharedLockHolders = new ConcurrentHashMap<>(); /** Scheduler that is used for interrupt-cycle. */ private ScheduledExecutorService mScheduler; @@ -77,6 +83,11 @@ public class StateLockManager { private ScheduledFuture mInterrupterFuture; /** Whether interrupt-cycle is entered. */ private AtomicBoolean mInterruptCycleTicking = new AtomicBoolean(false); + /** + * Logs when a thread acquires the shared state lock too many times, + * which indicates a deep recursion. + */ + private int mLogThreshold = Configuration.getInt(PropertyKey.MASTER_STATE_LOCK_ERROR_THRESHOLD); /** This is the deadline for forcing the lock. */ private long mForcedDurationMs; @@ -143,7 +154,7 @@ public LockResource lockShared() throws InterruptedException { final int readLockCount = mStateLock.getReadLockCount(); if (readLockCount > READ_LOCK_COUNT_HIGH) { SAMPLING_LOG.info("Read Lock Count Too High: {} {}", readLockCount, - mSharedWaitersAndHolders); + mSharedLockHolders); } } @@ -158,12 +169,29 @@ public LockResource lockShared() throws InterruptedException { } // Register thread for interrupt cycle. mSharedWaitersAndHolders.add(Thread.currentThread()); + String threadName = Thread.currentThread().getName(); + mSharedLockHolders.computeIfAbsent(threadName, k -> new LongAdder()).increment(); + if (mSharedLockHolders.get(threadName).longValue() > mLogThreshold) { + Exception e = new Exception("Thread recursion is deeper than " + mLogThreshold); + LOG.warn("Current thread is {}. All state lock holders are {}", + threadName, mSharedLockHolders, e); + } // Grab the lock interruptibly. mStateLock.readLock().lockInterruptibly(); // Return the resource. // Register an action to remove the thread from holders registry before releasing the lock. return new LockResource(mStateLock.readLock(), false, false, () -> { - mSharedWaitersAndHolders.remove(Thread.currentThread()); + // This is invoked in the same thread at the end of try-with-resource + Thread removedFrom = Thread.currentThread(); + mSharedLockHolders.computeIfPresent(removedFrom.getName(), (k, v) -> { + mSharedWaitersAndHolders.remove(Thread.currentThread()); + if (v.longValue() <= 1L) { + return null; + } else { + v.decrement(); + return v; + } + }); }); } @@ -233,9 +261,8 @@ public LockResource lockExclusive(StateLockOptions lockOptions, activateInterruptCycle(); // Force the lock. LOG.info("Thread-{} forcing the lock with {} waiters/holders: {}", - ThreadUtils.getCurrentThreadIdentifier(), mSharedWaitersAndHolders.size(), - mSharedWaitersAndHolders.stream().map((th) -> Long.toString(th.getId())) - .collect(Collectors.joining(","))); + ThreadUtils.getCurrentThreadIdentifier(), mSharedLockHolders.size(), + mSharedLockHolders); try { if (beforeAttempt != null) { beforeAttempt.run(); @@ -262,13 +289,8 @@ public LockResource lockExclusive(StateLockOptions lockOptions, /** * @return the list of thread identifiers that are waiting and holding on the shared lock */ - public List getSharedWaitersAndHolders() { - List result = new ArrayList<>(); - - for (Thread waiterOrHolder : mSharedWaitersAndHolders) { - result.add(ThreadUtils.getThreadIdentifier(waiterOrHolder)); - } - return result; + public Collection getSharedWaitersAndHolders() { + return Collections.unmodifiableSet(mSharedLockHolders.keySet()); } /** diff --git a/core/server/common/src/main/java/alluxio/master/journal/AbstractJournalSystem.java b/core/server/common/src/main/java/alluxio/master/journal/AbstractJournalSystem.java index 31ee1d00a96d..871e3d501642 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/AbstractJournalSystem.java +++ b/core/server/common/src/main/java/alluxio/master/journal/AbstractJournalSystem.java @@ -49,7 +49,7 @@ public synchronized void start() { @Override public synchronized void stop() { - Preconditions.checkState(mRunning, "Journal is not running"); + Preconditions.checkState(mRunning, "Journal is not running : " + this); mAllJournalSinks.forEach(JournalSink::beforeShutdown); mRunning = false; stopInternal(); diff --git a/core/server/common/src/main/java/alluxio/master/journal/DelegatingJournaled.java b/core/server/common/src/main/java/alluxio/master/journal/DelegatingJournaled.java index 2c5523fbff31..fdf278d8aa67 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/DelegatingJournaled.java +++ b/core/server/common/src/main/java/alluxio/master/journal/DelegatingJournaled.java @@ -16,8 +16,11 @@ import alluxio.proto.journal.Journal.JournalEntry; import alluxio.resource.CloseableIterator; +import java.io.File; import java.io.IOException; import java.io.OutputStream; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; import java.util.function.Supplier; /** @@ -45,11 +48,23 @@ default CheckpointName getCheckpointName() { return getDelegate().getCheckpointName(); } + @Override + default CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return getDelegate().writeToCheckpoint(directory, executorService); + } + @Override default void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { getDelegate().writeToCheckpoint(output); } + @Override + default CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return getDelegate().restoreFromCheckpoint(directory, executorService); + } + @Override default void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { getDelegate().restoreFromCheckpoint(input); diff --git a/core/server/common/src/main/java/alluxio/master/journal/FileSystemMergeJournalContext.java b/core/server/common/src/main/java/alluxio/master/journal/FileSystemMergeJournalContext.java index 4a89953bf136..450c25302a7d 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/FileSystemMergeJournalContext.java +++ b/core/server/common/src/main/java/alluxio/master/journal/FileSystemMergeJournalContext.java @@ -11,12 +11,10 @@ package alluxio.master.journal; -import alluxio.Constants; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.status.UnavailableException; import alluxio.proto.journal.Journal.JournalEntry; -import alluxio.util.logging.SamplingLogger; import com.google.common.base.Preconditions; import org.slf4j.Logger; @@ -42,10 +40,9 @@ public class FileSystemMergeJournalContext implements JournalContext { = Configuration.getInt( PropertyKey.MASTER_MERGE_JOURNAL_CONTEXT_NUM_ENTRIES_LOGGING_THRESHOLD); - private static final Logger SAMPLING_LOG = new SamplingLogger( - LoggerFactory.getLogger(FileSystemMergeJournalContext.class), 30L * Constants.SECOND_MS); + private static final Logger LOG = LoggerFactory.getLogger(FileSystemMergeJournalContext.class); - private final JournalContext mJournalContext; + protected final JournalContext mJournalContext; protected final JournalEntryMerger mJournalEntryMerger; /** @@ -77,8 +74,12 @@ public synchronized void append(JournalEntry entry) { mJournalEntryMerger.add(entry); List journalEntries = mJournalEntryMerger.getMergedJournalEntries(); if (journalEntries.size() >= MAX_LOGGING_ENTRIES) { - SAMPLING_LOG.warn("MergeJournalContext has " + journalEntries.size() - + " entries, over the limit of " + MAX_LOGGING_ENTRIES); + LOG.warn("MergeJournalContext has " + journalEntries.size() + + " entries, over the limit of " + MAX_LOGGING_ENTRIES + + ", forcefully merging journal entries and add them to the async journal writer" + + "\n Journal Entry: \n" + + entry, new Exception("MergeJournalContext Stacktrace:")); + appendMergedJournals(); } } diff --git a/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java b/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java index 3cdfefa784f8..03350192d481 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java +++ b/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java @@ -44,7 +44,9 @@ public static String getMasterForEntry(JournalEntry entry) { || entry.hasUpdateUfsMode() || entry.hasUpdateInode() || entry.hasUpdateInodeDirectory() - || entry.hasUpdateInodeFile()) { + || entry.hasUpdateInodeFile() + || entry.hasLoadJob() + || entry.hasCopyJob()) { return Constants.FILE_SYSTEM_MASTER_NAME; } if (entry.hasBlockContainerIdGenerator() diff --git a/core/server/common/src/main/java/alluxio/master/journal/JournaledGroup.java b/core/server/common/src/main/java/alluxio/master/journal/JournaledGroup.java index b90c0003a4cd..bc444dacee53 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/JournaledGroup.java +++ b/core/server/common/src/main/java/alluxio/master/journal/JournaledGroup.java @@ -19,9 +19,12 @@ import com.google.common.collect.Lists; +import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; /** * Convenience class which groups together multiple Journaled components as a single Journaled @@ -70,11 +73,27 @@ public CheckpointName getCheckpointName() { return mCheckpointName; } + @Override + public CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.allOf(mJournaled.stream() + .map(journaled -> journaled.writeToCheckpoint(directory, executorService)) + .toArray(CompletableFuture[]::new)); + } + @Override public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { JournalUtils.writeToCheckpoint(output, mJournaled); } + @Override + public CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.allOf(mJournaled.stream() + .map(journaled -> journaled.restoreFromCheckpoint(directory, executorService)) + .toArray(CompletableFuture[]::new)); + } + @Override public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { JournalUtils.restoreFromCheckpoint(input, mJournaled); diff --git a/core/server/common/src/main/java/alluxio/master/journal/MetadataSyncMergeJournalContext.java b/core/server/common/src/main/java/alluxio/master/journal/MetadataSyncMergeJournalContext.java index d3dfcc679dda..6099592b8a89 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/MetadataSyncMergeJournalContext.java +++ b/core/server/common/src/main/java/alluxio/master/journal/MetadataSyncMergeJournalContext.java @@ -11,6 +11,8 @@ package alluxio.master.journal; +import alluxio.exception.status.UnavailableException; + import com.google.common.annotations.VisibleForTesting; import javax.annotation.concurrent.NotThreadSafe; @@ -43,6 +45,9 @@ public MetadataSyncMergeJournalContext( super(journalContext, journalEntryMerger); } + /** + * Flushes the journals into the async journal writer. + */ @Override public void flush() { appendMergedJournals(); @@ -55,6 +60,14 @@ public void close() { // the rpc thread. } + /** + * Flushes and commits journals. + */ + public void hardFlush() throws UnavailableException { + appendMergedJournals(); + mJournalContext.flush(); + } + /** * @return the journal merger, used in unit test */ diff --git a/core/server/common/src/main/java/alluxio/master/journal/NoopJournaled.java b/core/server/common/src/main/java/alluxio/master/journal/NoopJournaled.java index ebcf77b7c83a..410454c51aec 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/NoopJournaled.java +++ b/core/server/common/src/main/java/alluxio/master/journal/NoopJournaled.java @@ -18,9 +18,12 @@ import alluxio.proto.journal.Journal.JournalEntry; import alluxio.resource.CloseableIterator; +import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.util.Collections; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; /** * Interface providing default implementations which do nothing. @@ -40,6 +43,12 @@ default CheckpointName getCheckpointName() { return CheckpointName.NOOP; } + @Override + default CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.completedFuture(null); + } + @Override default void writeToCheckpoint(OutputStream output) throws IOException { // Just write a checkpoint type with no data. The stream constructor writes unbuffered to the @@ -47,6 +56,12 @@ default void writeToCheckpoint(OutputStream output) throws IOException { new CheckpointOutputStream(output, CheckpointType.JOURNAL_ENTRY); } + @Override + default CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.completedFuture(null); + } + @Override default void restoreFromCheckpoint(CheckpointInputStream input) { } diff --git a/core/server/common/src/main/java/alluxio/master/journal/SingleEntryJournaled.java b/core/server/common/src/main/java/alluxio/master/journal/SingleEntryJournaled.java new file mode 100644 index 000000000000..7e4c224ade9a --- /dev/null +++ b/core/server/common/src/main/java/alluxio/master/journal/SingleEntryJournaled.java @@ -0,0 +1,53 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal; + +import alluxio.proto.journal.Journal; +import alluxio.resource.CloseableIterator; +import alluxio.util.CommonUtils; + +/** + * Journaled component responsible for journaling a single journal entry. + */ +public abstract class SingleEntryJournaled implements Journaled { + + private Journal.JournalEntry mEntry = Journal.JournalEntry.getDefaultInstance(); + + @Override + public CloseableIterator getJournalEntryIterator() { + return CloseableIterator.noopCloseable(CommonUtils.singleElementIterator(mEntry)); + } + + @Override + public boolean processJournalEntry(Journal.JournalEntry entry) { + if (!Journal.JournalEntry.getDefaultInstance().equals(mEntry)) { + LOG.warn("{} has already processed an entry", getCheckpointName()); + } + mEntry = entry; + return true; + } + + @Override + public void resetState() { + mEntry = Journal.JournalEntry.getDefaultInstance(); + } + + /** + * @return the entry stored by this object + */ + public Journal.JournalEntry getEntry() { + if (Journal.JournalEntry.getDefaultInstance().equals(mEntry)) { + LOG.warn("{} has not processed any entries", getCheckpointName()); + } + return mEntry; + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointInputStream.java b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointInputStream.java index 8c09303b7d0b..14eabc267c20 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointInputStream.java +++ b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointInputStream.java @@ -28,7 +28,7 @@ * * @see CheckpointOutputStream */ -public final class CheckpointInputStream extends DataInputStream { +public class CheckpointInputStream extends DataInputStream { private static final Logger LOG = LoggerFactory.getLogger(CheckpointInputStream.class); private final CheckpointType mType; diff --git a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java index 490768dd6648..8870f7ec5d4f 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java +++ b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java @@ -18,6 +18,7 @@ public enum CheckpointName { ACTIVE_SYNC_MANAGER, BLOCK_MASTER, + BLOCK_MASTER_CONTAINER_ID, CACHING_INODE_STORE, CLUSTER_INFO, FILE_SYSTEM_MASTER, @@ -39,4 +40,6 @@ public enum CheckpointName { TABLE_MASTER_TRANSFORM_MANAGER, TO_BE_PERSISTED_FILE_IDS, TTL_BUCKET_LIST, + SCHEDULER, + SNAPSHOT_ID, } diff --git a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/Checkpointed.java b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/Checkpointed.java index 7ba2f5098433..d381587d5851 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/Checkpointed.java +++ b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/Checkpointed.java @@ -11,18 +11,59 @@ package alluxio.master.journal.checkpoint; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.grpc.ErrorType; + +import io.grpc.Status; +import org.apache.ratis.io.MD5Hash; +import org.apache.ratis.util.MD5FileUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; import java.io.IOException; import java.io.OutputStream; +import java.security.MessageDigest; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; /** * Base class for Alluxio classes which can be written to and read from metadata checkpoints. */ public interface Checkpointed { + Logger LOG = LoggerFactory.getLogger(Checkpointed.class); /** * @return a name for this checkpointed class */ CheckpointName getCheckpointName(); + /** + * Writes a checkpoint to the specified directory asynchronously using the provided executor. + * + * @param directory where the checkpoint will be written + * @param executorService to use when running tasks asynchronously + * @return a future that processes the computation + */ + default CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.runAsync(() -> { + LOG.debug("taking {} snapshot started", getCheckpointName()); + File file = new File(directory, getCheckpointName().toString()); + try { + MessageDigest md5 = MD5Hash.getDigester(); + try (OutputStream outputStream = new OptimizedCheckpointOutputStream(file, md5)) { + writeToCheckpoint(outputStream); + } + MD5FileUtil.saveMD5File(file, new MD5Hash(md5.digest())); + } catch (Exception e) { + throw new AlluxioRuntimeException(Status.INTERNAL, + String.format("Failed to take snapshot %s", getCheckpointName()), + e, ErrorType.Internal, false); + } + LOG.debug("taking {} snapshot finished", getCheckpointName()); + }, executorService); + } + /** * Writes a checkpoint of all state to the given output stream. * @@ -33,6 +74,32 @@ public interface Checkpointed { */ void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException; + /** + * Restores state from a checkpoint asynchronously. + * @param directory where the checkpoint will be located + * @param executorService to use when running asynchronous tasks + * @return a future to track the progress + */ + default CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.runAsync(() -> { + LOG.debug("loading {} snapshot started", getCheckpointName()); + File file = new File(directory, getCheckpointName().toString()); + try { + MessageDigest md5 = MD5Hash.getDigester(); + try (CheckpointInputStream is = new OptimizedCheckpointInputStream(file, md5)) { + restoreFromCheckpoint(is); + } + MD5FileUtil.verifySavedMD5(file, new MD5Hash(md5.digest())); + } catch (Exception e) { + throw new AlluxioRuntimeException(Status.INTERNAL, + String.format("Failed to restore snapshot %s", getCheckpointName()), + e, ErrorType.Internal, false); + } + LOG.debug("loading {} snapshot finished", getCheckpointName()); + }, executorService); + } + /** * Restores state from a checkpoint. * diff --git a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointInputStream.java b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointInputStream.java new file mode 100644 index 000000000000..a29e4390c003 --- /dev/null +++ b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointInputStream.java @@ -0,0 +1,38 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.checkpoint; + +import net.jpountz.lz4.LZ4FrameInputStream; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.security.DigestInputStream; +import java.security.MessageDigest; + +/** + * InputStream to read checkpoint files efficiently. + */ +public class OptimizedCheckpointInputStream extends CheckpointInputStream { + + /** + * @param file where the checkpoint will be read from + * @param digest that verifies the file has not been corrupted + * @throws IOException propagates wrapped input stream exceptions + */ + public OptimizedCheckpointInputStream(File file, MessageDigest digest) throws IOException { + super(new DigestInputStream(new LZ4FrameInputStream( + new BufferedInputStream(Files.newInputStream(file.toPath()), + OptimizedCheckpointOutputStream.BUFFER_SIZE)), digest)); + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointOutputStream.java b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointOutputStream.java new file mode 100644 index 000000000000..f010e020eb08 --- /dev/null +++ b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointOutputStream.java @@ -0,0 +1,65 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.checkpoint; + +import alluxio.util.FormatUtils; + +import net.jpountz.lz4.LZ4FrameOutputStream; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.security.DigestOutputStream; +import java.security.MessageDigest; + +/** + * OutputStream to write checkpoint files efficiently. + */ +public class OptimizedCheckpointOutputStream extends OutputStream { + public static final int BUFFER_SIZE = (int) FormatUtils.parseSpaceSize("4MB"); + + private final OutputStream mOutputStream; + + /** + * @param file where the checkpoint will be written + * @param digest to ensure uncorrupted data + * @throws IOException propagates wrapped output stream exceptions + */ + public OptimizedCheckpointOutputStream(File file, MessageDigest digest) throws IOException { + this(file, digest, BUFFER_SIZE); + } + + /** + * Constructor used for benchmarking. + * @param file where the checkpoint will be written + * @param digest to ensure uncorrupted data + * @param bufferSize the buffer size that the output stream should use + */ + public OptimizedCheckpointOutputStream(File file, MessageDigest digest, int bufferSize) + throws IOException { + mOutputStream = new DigestOutputStream(new LZ4FrameOutputStream( + new BufferedOutputStream(Files.newOutputStream(file.toPath()), bufferSize)), + digest); + } + + @Override + public void write(int b) throws IOException { + mOutputStream.write(b); + } + + @Override + public void close() throws IOException { + mOutputStream.close(); + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/JournalStateMachine.java b/core/server/common/src/main/java/alluxio/master/journal/raft/JournalStateMachine.java index a96f69717490..f8f57ac0f697 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/JournalStateMachine.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/JournalStateMachine.java @@ -24,22 +24,19 @@ import alluxio.master.journal.CatchupFuture; import alluxio.master.journal.JournalUtils; import alluxio.master.journal.Journaled; +import alluxio.master.journal.SingleEntryJournaled; import alluxio.master.journal.checkpoint.CheckpointInputStream; import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; import alluxio.proto.journal.Journal.JournalEntry; import alluxio.resource.LockResource; -import alluxio.util.FormatUtils; -import alluxio.util.LogUtils; import alluxio.util.StreamUtils; import alluxio.util.logging.SamplingLogger; import com.codahale.metrics.Timer; import com.google.common.base.Preconditions; -import org.apache.ratis.io.MD5Hash; import org.apache.ratis.proto.RaftProtos; import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftGroup; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftGroupMemberId; import org.apache.ratis.protocol.RaftPeerId; @@ -52,19 +49,17 @@ import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.statemachine.impl.BaseStateMachine; import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; import org.apache.ratis.util.LifeCycle; -import org.apache.ratis.util.MD5FileUtil; +import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.DataInputStream; -import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; +import java.time.Duration; +import java.time.Instant; import java.util.Collection; import java.util.List; import java.util.Map; @@ -72,10 +67,9 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutorService; -import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicReference; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; +import java.util.stream.Stream; import javax.annotation.concurrent.GuardedBy; import javax.annotation.concurrent.ThreadSafe; @@ -101,7 +95,8 @@ public class JournalStateMachine extends BaseStateMachine { /** Journals managed by this applier. */ private final Map mJournals; private final RaftJournalSystem mJournalSystem; - private final SnapshotReplicationManager mSnapshotManager; + private final RaftSnapshotManager mSnapshotManager; + private final SnapshotDirStateMachineStorage mStorage; private final AtomicReference mStateLockManagerRef = new AtomicReference<>(null); @GuardedBy("this") @@ -109,10 +104,6 @@ public class JournalStateMachine extends BaseStateMachine { @GuardedBy("this") private boolean mClosed = false; - private final Lock mGroupLock = new ReentrantLock(); - @GuardedBy("mGroupLock") - private boolean mServerClosing = false; - private volatile long mLastAppliedCommitIndex = -1; // The last special "primary start" sequence number applied to this state machine. These special // sequence numbers are identified by being negative. @@ -121,7 +112,7 @@ public class JournalStateMachine extends BaseStateMachine { private volatile boolean mSnapshotting = false; private volatile boolean mIsLeader = false; - private final ExecutorService mJournalPool; + private final ExecutorService mJournalPool = Executors.newCachedThreadPool(); /** * This callback is used for interrupting someone who suspends the journal applier to work on @@ -142,31 +133,34 @@ public class JournalStateMachine extends BaseStateMachine { // The last index of the latest journal snapshot // created by this master or downloaded from other masters private volatile long mSnapshotLastIndex = -1; + private long mLastSnapshotTime = -1; + @SuppressFBWarnings(value = "IS2_INCONSISTENT_SYNC", + justification = "Written in synchronized block, read by metrics") + private long mLastSnapshotDurationMs = -1; + @SuppressFBWarnings(value = "IS2_INCONSISTENT_SYNC", + justification = "Written in synchronized block, read by metrics") + private long mLastSnapshotEntriesCount = -1; + private long mLastSnapshotReplayDurationMs = -1; + private long mLastSnapshotReplayEntriesCount = -1; /** Used to control applying to masters. */ private BufferedJournalApplier mJournalApplier; - private final SimpleStateMachineStorage mStorage = new SimpleStateMachineStorage(); - private RaftGroupId mRaftGroupId; - private RaftServer mServer; - private long mLastCheckPointTime = -1; /** * @param journals master journals; these journals are still owned by the caller, not by the * journal state machine * @param journalSystem the raft journal system + * @param storage the {@link SnapshotDirStateMachineStorage} that this state machine will use */ - public JournalStateMachine(Map journals, RaftJournalSystem journalSystem) { - int maxConcurrencyPoolSize = - Configuration.getInt(PropertyKey.MASTER_JOURNAL_LOG_CONCURRENCY_MAX); - mJournalPool = new ForkJoinPool(maxConcurrencyPoolSize); - LOG.info("Ihe max concurrency for notifyTermIndexUpdated is loading with max threads {}", - maxConcurrencyPoolSize); + public JournalStateMachine(Map journals, RaftJournalSystem journalSystem, + SnapshotDirStateMachineStorage storage) { mJournals = journals; mJournalApplier = new BufferedJournalApplier(journals, () -> journalSystem.getJournalSinks(null)); resetState(); LOG.info("Initialized new journal state machine"); mJournalSystem = journalSystem; - mSnapshotManager = new SnapshotReplicationManager(journalSystem, mStorage); + mStorage = storage; + mSnapshotManager = new RaftSnapshotManager(mStorage, mJournalPool); MetricsSystem.registerGaugeIfAbsent( MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_LAST_INDEX.getName(), @@ -176,7 +170,7 @@ public JournalStateMachine(Map journals, RaftJournalSystem () -> getLastAppliedTermIndex().getIndex() - mSnapshotLastIndex); MetricsSystem.registerGaugeIfAbsent( MetricKey.MASTER_JOURNAL_LAST_CHECKPOINT_TIME.getName(), - () -> mLastCheckPointTime); + () -> mLastSnapshotTime); MetricsSystem.registerGaugeIfAbsent( MetricKey.MASTER_JOURNAL_LAST_APPLIED_COMMIT_INDEX.getName(), () -> mLastAppliedCommitIndex); @@ -184,9 +178,21 @@ public JournalStateMachine(Map journals, RaftJournalSystem MetricKey.MASTER_JOURNAL_CHECKPOINT_WARN.getName(), () -> getLastAppliedTermIndex().getIndex() - mSnapshotLastIndex > Configuration.getInt(PropertyKey.MASTER_JOURNAL_CHECKPOINT_PERIOD_ENTRIES) - && System.currentTimeMillis() - mLastCheckPointTime > Configuration.getMs( + && System.currentTimeMillis() - mLastSnapshotTime > Configuration.getMs( PropertyKey.MASTER_WEB_JOURNAL_CHECKPOINT_WARNING_THRESHOLD_TIME) ); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DURATION_MS.getName(), + () -> mLastSnapshotDurationMs); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_ENTRIES_COUNT.getName(), + () -> mLastSnapshotEntriesCount); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_REPLAY_DURATION_MS.getName(), + () -> mLastSnapshotReplayDurationMs); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_REPLAY_ENTRIES_COUNT.getName(), + () -> mLastSnapshotReplayEntriesCount); } @Override @@ -194,13 +200,8 @@ public void initialize(RaftServer server, RaftGroupId groupId, RaftStorage raftStorage) throws IOException { getLifeCycle().startAndTransition(() -> { super.initialize(server, groupId, raftStorage); - mServer = server; - mRaftGroupId = groupId; mStorage.init(raftStorage); - loadSnapshot(mStorage.getLatestSnapshot()); - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } + loadSnapshot(getLatestSnapshot()); }); } @@ -208,58 +209,27 @@ public void initialize(RaftServer server, RaftGroupId groupId, public void reinitialize() throws IOException { LOG.info("Reinitializing state machine."); mStorage.loadLatestSnapshot(); - loadSnapshot(mStorage.getLatestSnapshot()); + loadSnapshot(getLatestSnapshot()); unpause(); - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } } - private synchronized void loadSnapshot(SingleFileSnapshotInfo snapshot) throws IOException { + private synchronized void loadSnapshot(SnapshotInfo snapshot) throws IOException { if (snapshot == null) { LOG.info("No snapshot to load"); return; } - LOG.info("Loading Snapshot {}", snapshot); - final File snapshotFile = snapshot.getFile().getPath().toFile(); - if (!snapshotFile.exists()) { - throw new FileNotFoundException( - String.format("The snapshot file %s does not exist", snapshotFile.getPath())); - } try { resetState(); setLastAppliedTermIndex(snapshot.getTermIndex()); - install(snapshotFile); + LOG.debug("Loading snapshot {}", snapshot); + install(snapshot); + LOG.debug("Finished loading snapshot {}", snapshot); mSnapshotLastIndex = getLatestSnapshot() != null ? getLatestSnapshot().getIndex() : -1; - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } } catch (Exception e) { throw new IOException(String.format("Failed to load snapshot %s", snapshot), e); } } - /** - * Called by {@link RaftJournalSystem} stop internal method before - * shutting down the raft server to prevent a deadlock on - * the lock in RaftServerProxy. - */ - protected void setServerClosing() { - try (LockResource ignored = new LockResource(mGroupLock)) { - mServerClosing = true; - } - } - - /** - * Called by {@link RaftJournalSystem} stop internal method after - * shutting down the raft server. - */ - protected void afterServerClosing() { - try (LockResource ignored = new LockResource(mGroupLock)) { - mServerClosing = false; - } - } - /** * Allows leader to take snapshots. This is used exclusively for the * `bin/alluxio fsadmin journal checkpoint` command. @@ -278,48 +248,32 @@ public long takeSnapshot() { long index; StateLockManager stateLockManager = mStateLockManagerRef.get(); if (!mIsLeader) { + LOG.info("Taking local snapshot as follower"); index = takeLocalSnapshot(false); } else if (stateLockManager != null) { // the leader has been allowed to take a local snapshot by being given a non-null // StateLockManager through the #allowLeaderSnapshots method try (LockResource stateLock = stateLockManager.lockExclusive(StateLockOptions.defaults())) { + LOG.info("Taking local snapshot as leader"); index = takeLocalSnapshot(true); } catch (Exception e) { return RaftLog.INVALID_LOG_INDEX; } } else { - RaftGroup group; - try (LockResource ignored = new LockResource(mGroupLock)) { - if (mServerClosing) { - return RaftLog.INVALID_LOG_INDEX; - } - // These calls are protected by mGroupLock and mServerClosing - // as they will access the lock in RaftServerProxy.java - // which is also accessed during raft server shutdown which - // can cause a deadlock as the shutdown takes the lock while - // waiting for this thread to finish - Preconditions.checkState(mServer.getGroups().iterator().hasNext()); - group = mServer.getGroups().iterator().next(); - } catch (IOException e) { - SAMPLING_LOG.warn("Failed to get raft group info: {}", e.getMessage()); - return RaftLog.INVALID_LOG_INDEX; - } - if (group.getPeers().size() < 2) { - SAMPLING_LOG.warn("No follower to perform delegated snapshot. Please add more masters to " - + "the quorum or manually take snapshot using 'alluxio fsadmin journal checkpoint'"); - return RaftLog.INVALID_LOG_INDEX; - } else { - index = mSnapshotManager.maybeCopySnapshotFromFollower(); - } + index = mSnapshotManager.downloadSnapshotFromOtherMasters(); } // update metrics if took a snapshot if (index != RaftLog.INVALID_LOG_INDEX) { mSnapshotLastIndex = index; - mLastCheckPointTime = System.currentTimeMillis(); + mLastSnapshotTime = System.currentTimeMillis(); + LOG.info("Took snapshot up to index {} at time {}", mSnapshotLastIndex, DateTime.now()); } return index; } + /** + * @return the latest snapshot information, or null of no snapshot exists + */ @Override public SnapshotInfo getLatestSnapshot() { return mStorage.getLatestSnapshot(); @@ -337,14 +291,6 @@ public CompletableFuture query(Message request) { JournalQueryRequest queryRequest = JournalQueryRequest.parseFrom( request.getContent().asReadOnlyByteBuffer()); LOG.debug("Received query request: {}", queryRequest); - // give snapshot manager a chance to handle snapshot related requests - Message reply = mSnapshotManager.handleRequest(queryRequest); - if (reply != null) { - future.complete(reply); - return future; - } - // Snapshot manager returned null indicating the request is not handled. Check and handle - // other type of requests. if (queryRequest.hasAddQuorumServerRequest()) { AddQuorumServerRequest addRequest = queryRequest.getAddQuorumServerRequest(); return CompletableFuture.supplyAsync(() -> { @@ -355,21 +301,25 @@ public CompletableFuture query(Message request) { } return Message.EMPTY; }); + } else { + return super.query(request); } } catch (Exception e) { LOG.error("failed processing request {}", request, e); future.completeExceptionally(e); return future; } - return super.query(request); } @Override public void close() { mClosed = true; - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } + MetricsSystem.removeMetrics(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_LAST_INDEX.getName()); + MetricsSystem.removeMetrics(MetricKey.MASTER_JOURNAL_ENTRIES_SINCE_CHECKPOINT.getName()); + MetricsSystem.removeMetrics(MetricKey.MASTER_JOURNAL_LAST_CHECKPOINT_TIME.getName()); + MetricsSystem.removeMetrics(MetricKey.MASTER_JOURNAL_LAST_APPLIED_COMMIT_INDEX.getName()); + MetricsSystem.removeMetrics(MetricKey.MASTER_JOURNAL_CHECKPOINT_WARN.getName()); + mSnapshotManager.close(); } @Override @@ -390,9 +340,6 @@ public CompletableFuture applyTransaction(TransactionContext trx) { public void notifyNotLeader(Collection pendingEntries) { mIsLeader = false; mJournalSystem.notifyLeadershipStateChanged(false); - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } } @Override @@ -401,38 +348,22 @@ public void notifyConfigurationChanged(long term, long index, CompletableFuture.runAsync(mJournalSystem::updateGroup, mJournalPool); } - private long getNextIndex() { - try { - return mServer.getDivision(mRaftGroupId).getRaftLog().getNextIndex(); - } catch (IOException e) { - throw new IllegalStateException("Cannot obtain raft log index", e); - } - } - @Override public CompletableFuture notifyInstallSnapshotFromLeader( RaftProtos.RoleInfoProto roleInfoProto, TermIndex firstTermIndexInLog) { - if (roleInfoProto.getRole() != RaftProtos.RaftPeerRole.FOLLOWER) { - return RaftJournalUtils.completeExceptionally( - new IllegalStateException(String.format( - "Server should be a follower when installing a snapshot from leader. Actual: %s", - roleInfoProto.getRole()))); - } - return mSnapshotManager.installSnapshotFromLeader().thenApply(snapshotIndex -> { - long latestJournalIndex = getNextIndex() - 1; - if (latestJournalIndex >= snapshotIndex.getIndex()) { - // do not reload the state machine if the downloaded snapshot is older than the latest entry - // fail the request after installation so the leader will stop sending the same request - throw new IllegalArgumentException( - String.format("Downloaded snapshot index %d is older than the latest entry index %d", - snapshotIndex.getIndex(), latestJournalIndex)); - } - mSnapshotLastIndex = snapshotIndex.getIndex(); - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); + // this method is called automatically by Ratis when the leader does not have all the logs to + // give to this follower. This method instructs the follower to download a snapshot from + // other masters to become up-to-date. + LOG.info("Received instruction to install snapshot from other master asynchronously"); + return CompletableFuture.supplyAsync(() -> { + mSnapshotManager.downloadSnapshotFromOtherMasters(); + long index = mSnapshotManager.waitForAttemptToComplete(); + if (index == RaftLog.INVALID_LOG_INDEX) { + LOG.info("Failed to install snapshot from other master asynchronously"); + return null; } - return snapshotIndex; - }); + return getLatestSnapshot().getTermIndex(); + }, mJournalPool); } @Override @@ -566,9 +497,10 @@ public synchronized long takeLocalSnapshot(boolean hasStateLock) { SAMPLING_LOG.info("Skip taking snapshot because state machine is closed."); return RaftLog.INVALID_LOG_INDEX; } - if (mServer.getLifeCycleState() != LifeCycle.State.RUNNING) { + RaftServer server = getServer().join(); // gets completed during initialization + if (server.getLifeCycleState() != LifeCycle.State.RUNNING) { SAMPLING_LOG.info("Skip taking snapshot because raft server is not in running state: " - + "current state is {}.", mServer.getLifeCycleState()); + + "current state is {}.", server.getLifeCycleState()); return RaftLog.INVALID_LOG_INDEX; } if (mJournalApplier.isSuspended()) { @@ -585,66 +517,37 @@ public synchronized long takeLocalSnapshot(boolean hasStateLock) { LOG.debug("Calling snapshot"); Preconditions.checkState(!mSnapshotting, "Cannot call snapshot multiple times concurrently"); mSnapshotting = true; - try (Timer.Context ctx = MetricsSystem - .timer(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_GENERATE_TIMER.getName()).time()) { - // The start time of the most recent snapshot - long lastSnapshotStartTime = System.currentTimeMillis(); + TermIndex last = getLastAppliedTermIndex(); + + File snapshotDir = getSnapshotDir(last.getTerm(), last.getIndex()); + if (!snapshotDir.isDirectory() && !snapshotDir.mkdir()) { + return RaftLog.INVALID_LOG_INDEX; + } + try (Timer.Context ctx = MetricsSystem.timer( + MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_GENERATE_TIMER.getName()).time()) { + Instant start = Instant.now(); long snapshotId = mNextSequenceNumberToRead - 1; - TermIndex last = getLastAppliedTermIndex(); - File tempFile; - try { - tempFile = RaftJournalUtils.createTempSnapshotFile(mStorage); - } catch (IOException e) { - LogUtils.warnWithException(LOG, "Failed to create temp snapshot file", e); - return RaftLog.INVALID_LOG_INDEX; - } - LOG.info("Taking a snapshot to file {}", tempFile); - final File snapshotFile = mStorage.getSnapshotFile(last.getTerm(), last.getIndex()); - try (DataOutputStream outputStream = new DataOutputStream(new FileOutputStream(tempFile))) { - outputStream.writeLong(snapshotId); - JournalUtils.writeToCheckpoint(outputStream, getStateMachines()); - } catch (Exception e) { - tempFile.delete(); - LogUtils.warnWithException(LOG, - "Failed to write snapshot {} to file {}", snapshotId, tempFile, e); - return RaftLog.INVALID_LOG_INDEX; - } - try { - final MD5Hash digest = MD5FileUtil.computeMd5ForFile(tempFile); - LOG.info("Saving digest for snapshot file {}", snapshotFile); - MD5FileUtil.saveMD5File(snapshotFile, digest); - LOG.info("Renaming a snapshot file {} to {}", tempFile, snapshotFile); - if (!tempFile.renameTo(snapshotFile)) { - tempFile.delete(); - LOG.warn("Failed to rename snapshot from {} to {}", tempFile, snapshotFile); - return RaftLog.INVALID_LOG_INDEX; - } - LOG.info("Completed snapshot with size {} up to SN {} in {}ms", - FormatUtils.getSizeFromBytes(snapshotFile.length()), - snapshotId, System.currentTimeMillis() - lastSnapshotStartTime); - } catch (Exception e) { - tempFile.delete(); - LogUtils.warnWithException(LOG, - "Failed to complete snapshot: {} - {}", snapshotId, snapshotFile, e); - return RaftLog.INVALID_LOG_INDEX; - } - try { - mStorage.loadLatestSnapshot(); - } catch (Exception e) { - snapshotFile.delete(); - LogUtils.warnWithException(LOG, "Failed to refresh latest snapshot: {}", snapshotId, e); - return RaftLog.INVALID_LOG_INDEX; - } + SingleEntryJournaled idWriter = new SnapshotIdJournaled(); + idWriter.processJournalEntry(JournalEntry.newBuilder().setSequenceNumber(snapshotId).build()); + CompletableFuture.allOf(Stream.concat(Stream.of(idWriter), getStateMachines().stream()) + .map(journaled -> journaled.writeToCheckpoint(snapshotDir, mJournalPool)) + .toArray(CompletableFuture[]::new)) + .join(); + mStorage.loadLatestSnapshot(); + mStorage.signalNewSnapshot(); + + mLastSnapshotDurationMs = Duration.between(start, Instant.now()).toMillis(); + mLastSnapshotEntriesCount = mNextSequenceNumberToRead; return last.getIndex(); + } catch (Exception e) { + LOG.error("error taking snapshot", e); + return RaftLog.INVALID_LOG_INDEX; } finally { mSnapshotting = false; - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } } } - private void install(File snapshotFile) { + private void install(SnapshotInfo snapshot) { if (mClosed) { return; } @@ -653,14 +556,29 @@ private void install(File snapshotFile) { return; } + File snapshotDir = getSnapshotDir(snapshot.getTerm(), snapshot.getIndex()); long snapshotId = 0L; try (Timer.Context ctx = MetricsSystem.timer(MetricKey - .MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLAY_TIMER.getName()).time(); - DataInputStream stream = new DataInputStream(new FileInputStream(snapshotFile))) { - snapshotId = stream.readLong(); - JournalUtils.restoreFromCheckpoint(new CheckpointInputStream(stream), getStateMachines()); + .MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLAY_TIMER.getName()).time()) { + Instant start = Instant.now(); + if (snapshotDir.isFile()) { + LOG.info("Restoring from snapshot {} in old format", snapshot.getTermIndex()); + try (DataInputStream stream = new DataInputStream(new FileInputStream(snapshotDir))) { + snapshotId = stream.readLong(); + JournalUtils.restoreFromCheckpoint(new CheckpointInputStream(stream), getStateMachines()); + } + } else { + SingleEntryJournaled idReader = new SnapshotIdJournaled(); + CompletableFuture.allOf(Stream.concat(Stream.of(idReader), getStateMachines().stream()) + .map(journaled -> journaled.restoreFromCheckpoint(snapshotDir, mJournalPool)) + .toArray(CompletableFuture[]::new)) + .join(); + snapshotId = idReader.getEntry().getSequenceNumber(); + } + mLastSnapshotReplayDurationMs = Duration.between(start, Instant.now()).toMillis(); } catch (Exception e) { - JournalUtils.handleJournalReplayFailure(LOG, e, "Failed to install snapshot: %s", snapshotId); + JournalUtils.handleJournalReplayFailure(LOG, e, "Failed to install snapshot: %s", + snapshot.getTermIndex()); if (Configuration.getBoolean(PropertyKey.MASTER_JOURNAL_TOLERATE_CORRUPTION)) { return; } @@ -671,9 +589,15 @@ private void install(File snapshotFile) { mNextSequenceNumberToRead); } mNextSequenceNumberToRead = snapshotId + 1; + mLastSnapshotReplayEntriesCount = mNextSequenceNumberToRead; LOG.info("Successfully installed snapshot up to SN {}", snapshotId); } + private File getSnapshotDir(long term, long index) { + String dirName = SimpleStateMachineStorage.getSnapshotFileName(term, index); + return new File(mStorage.getSnapshotDir(), dirName); + } + /** * Suspends applying to masters. * @@ -790,22 +714,15 @@ public synchronized boolean isSnapshotting() { @Override public void notifyLeaderChanged(RaftGroupMemberId groupMemberId, RaftPeerId raftPeerId) { - if (mRaftGroupId == groupMemberId.getGroupId()) { + if (getGroupId() == groupMemberId.getGroupId()) { mIsLeader = groupMemberId.getPeerId() == raftPeerId; mJournalSystem.notifyLeadershipStateChanged(mIsLeader); } else { LOG.warn("Received notification for unrecognized group {}, current group is {}", - groupMemberId.getGroupId(), mRaftGroupId); + groupMemberId.getGroupId(), getGroupId()); } } - /** - * @return the snapshot replication manager - */ - public SnapshotReplicationManager getSnapshotReplicationManager() { - return mSnapshotManager; - } - /** * @return whether the journal is suspended */ diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceClient.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceClient.java index 4531c847217f..9bbd13273980 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceClient.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceClient.java @@ -12,28 +12,41 @@ package alluxio.master.journal.raft; import alluxio.AbstractMasterClient; +import alluxio.ClientContext; import alluxio.Constants; -import alluxio.grpc.DownloadSnapshotPRequest; -import alluxio.grpc.DownloadSnapshotPResponse; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.LatestSnapshotInfoPRequest; import alluxio.grpc.RaftJournalServiceGrpc; import alluxio.grpc.ServiceType; -import alluxio.grpc.UploadSnapshotPRequest; -import alluxio.grpc.UploadSnapshotPResponse; +import alluxio.grpc.SnapshotData; +import alluxio.grpc.SnapshotMetadata; import alluxio.master.MasterClientContext; +import alluxio.master.selectionpolicy.MasterSelectionPolicy; +import alluxio.retry.RetryPolicy; -import io.grpc.stub.StreamObserver; +import java.util.Iterator; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; /** * A client for raft journal service. */ public class RaftJournalServiceClient extends AbstractMasterClient { - private RaftJournalServiceGrpc.RaftJournalServiceStub mClient = null; + private final long mRequestInfoTimeoutMs = + Configuration.getMs(PropertyKey.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT); + + private RaftJournalServiceGrpc.RaftJournalServiceBlockingStub mBlockingClient = null; /** - * @param clientContext master client context + * Create a client that talks to a specific master. + * @param selectionPolicy specifies which master is targeted + * @param retryPolicySupplier the retry policy to use when connecting to another master */ - public RaftJournalServiceClient(MasterClientContext clientContext) { - super(clientContext); + public RaftJournalServiceClient(MasterSelectionPolicy selectionPolicy, + Supplier retryPolicySupplier) { + super(MasterClientContext.newBuilder(ClientContext.create(Configuration.global())).build(), + selectionPolicy, retryPolicySupplier); } @Override @@ -51,28 +64,33 @@ protected long getServiceVersion() { return Constants.RAFT_JOURNAL_SERVICE_VERSION; } + @Override + protected void beforeConnect() { + // the default behavior of this method is to search for the primary master + // in our case we do no care which one is the primary master as MasterSelectionPolicy is + // explicitly specified + } + @Override protected void afterConnect() { - mClient = RaftJournalServiceGrpc.newStub(mChannel); + mBlockingClient = RaftJournalServiceGrpc.newBlockingStub(mChannel); } /** - * Uploads a snapshot. - * @param responseObserver the response stream observer - * @return the request stream observer + * @return {@link SnapshotMetadata} from specified master */ - public StreamObserver uploadSnapshot( - StreamObserver responseObserver) { - return mClient.uploadSnapshot(responseObserver); + public SnapshotMetadata requestLatestSnapshotInfo() { + return mBlockingClient.withDeadlineAfter(mRequestInfoTimeoutMs, TimeUnit.MILLISECONDS) + .requestLatestSnapshotInfo(LatestSnapshotInfoPRequest.getDefaultInstance()); } /** - * Downloads a snapshot. - * @param responseObserver the response stream observer - * @return the request stream observer + * Receive snapshot data from specified follower. + * + * @param request the request detailing which file to download + * @return an iterator containing the snapshot data */ - public StreamObserver downloadSnapshot( - StreamObserver responseObserver) { - return mClient.downloadSnapshot(responseObserver); + public Iterator requestLatestSnapshotData(SnapshotMetadata request) { + return mBlockingClient.requestLatestSnapshotData(request); } } diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceHandler.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceHandler.java index 9bc203a87ed5..8bba26f74dea 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceHandler.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceHandler.java @@ -11,37 +11,173 @@ package alluxio.master.journal.raft; -import alluxio.grpc.DownloadSnapshotPRequest; -import alluxio.grpc.DownloadSnapshotPResponse; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.LatestSnapshotInfoPRequest; import alluxio.grpc.RaftJournalServiceGrpc; -import alluxio.grpc.UploadSnapshotPRequest; -import alluxio.grpc.UploadSnapshotPResponse; +import alluxio.grpc.SnapshotData; +import alluxio.grpc.SnapshotMetadata; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.util.compression.DirectoryMarshaller; +import com.google.protobuf.ByteString; +import com.google.protobuf.UnsafeByteOperations; +import io.grpc.Context; +import io.grpc.Status; import io.grpc.stub.StreamObserver; +import org.apache.ratis.server.protocol.TermIndex; +import org.apache.ratis.statemachine.SnapshotInfo; +import org.apache.ratis.statemachine.StateMachineStorage; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.OutputStream; +import java.nio.file.Path; +import java.time.Duration; +import java.time.Instant; +import java.util.concurrent.TimeUnit; /** * RPC handler for raft journal service. */ public class RaftJournalServiceHandler extends RaftJournalServiceGrpc.RaftJournalServiceImplBase { + private static final Logger LOG = LoggerFactory.getLogger(RaftJournalServiceHandler.class); - private final SnapshotReplicationManager mManager; + private final StateMachineStorage mStateMachineStorage; + private volatile long mLastSnapshotUploadDurationMs = -1; + private volatile long mLastSnapshotUploadSize = -1; + private volatile long mLastSnapshotUploadDiskSize = -1; /** - * @param manager the snapshot replication manager + * @param storage the storage that the state machine uses for its snapshots */ - public RaftJournalServiceHandler(SnapshotReplicationManager manager) { - mManager = manager; + public RaftJournalServiceHandler(StateMachineStorage storage) { + mStateMachineStorage = storage; + + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_UPLOAD_DURATION_MS.getName(), + () -> mLastSnapshotUploadDurationMs); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_UPLOAD_SIZE.getName(), + () -> mLastSnapshotUploadSize); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_UPLOAD_DISK_SIZE.getName(), + () -> mLastSnapshotUploadDiskSize); } @Override - public StreamObserver uploadSnapshot( - StreamObserver responseObserver) { - return mManager.receiveSnapshotFromFollower(responseObserver); + public void requestLatestSnapshotInfo(LatestSnapshotInfoPRequest request, + StreamObserver responseObserver) { + LOG.info("Received request for latest snapshot info"); + if (Context.current().isCancelled()) { + responseObserver.onError( + Status.CANCELLED.withDescription("Cancelled by client").asRuntimeException()); + return; + } + SnapshotInfo snapshot = mStateMachineStorage.getLatestSnapshot(); + SnapshotMetadata.Builder metadata = SnapshotMetadata.newBuilder(); + if (snapshot == null) { + LOG.info("No snapshot to send"); + metadata.setExists(false); + } else { + LOG.info("Found snapshot {}", snapshot.getTermIndex()); + metadata.setExists(true) + .setSnapshotTerm(snapshot.getTerm()) + .setSnapshotIndex(snapshot.getIndex()); + } + responseObserver.onNext(metadata.build()); + responseObserver.onCompleted(); } @Override - public StreamObserver downloadSnapshot( - StreamObserver responseObserver) { - return mManager.sendSnapshotToFollower(responseObserver); + public void requestLatestSnapshotData(SnapshotMetadata request, + StreamObserver responseObserver) { + TermIndex index = TermIndex.valueOf(request.getSnapshotTerm(), request.getSnapshotIndex()); + LOG.info("Received request for snapshot data {}", index); + if (Context.current().isCancelled()) { + responseObserver.onError( + Status.CANCELLED.withDescription("Cancelled by client").asRuntimeException()); + return; + } + + String snapshotDirName = SimpleStateMachineStorage + .getSnapshotFileName(request.getSnapshotTerm(), request.getSnapshotIndex()); + Path snapshotPath = new File(mStateMachineStorage.getSnapshotDir(), snapshotDirName).toPath(); + long totalBytesSent; + long diskSize; + LOG.info("Begin snapshot upload of {}", index); + Instant start = Instant.now(); + try (SnapshotGrpcOutputStream stream = new SnapshotGrpcOutputStream(responseObserver)) { + DirectoryMarshaller marshaller = DirectoryMarshaller.Factory.create(); + diskSize = marshaller.write(snapshotPath, stream); + totalBytesSent = stream.totalBytes(); + } catch (Exception e) { + LOG.warn("Failed to upload snapshot {}", index, e); + responseObserver.onError(Status.INTERNAL.withCause(e).asRuntimeException()); + return; + } + responseObserver.onCompleted(); + // update last duration and duration timer metrics + mLastSnapshotUploadDurationMs = Duration.between(start, Instant.now()).toMillis(); + MetricsSystem.timer(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_UPLOAD_TIMER.getName()) + .update(mLastSnapshotUploadDurationMs, TimeUnit.MILLISECONDS); + LOG.info("Total milliseconds to upload {}: {}", index, mLastSnapshotUploadDurationMs); + // update uncompressed snapshot size metric + mLastSnapshotUploadDiskSize = diskSize; + MetricsSystem.histogram( + MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_UPLOAD_DISK_HISTOGRAM.getName()) + .update(mLastSnapshotUploadDiskSize); + LOG.info("Total snapshot uncompressed bytes for {}: {}", index, mLastSnapshotUploadDiskSize); + // update compressed snapshot size (aka size sent over the network) + mLastSnapshotUploadSize = totalBytesSent; + MetricsSystem.histogram(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_UPLOAD_HISTOGRAM.getName()) + .update(mLastSnapshotUploadSize); + LOG.info("Total bytes sent for {}: {}", index, mLastSnapshotUploadSize); + LOG.info("Uploaded snapshot {}", index); + } + + static class SnapshotGrpcOutputStream extends OutputStream { + private final int mSnapshotReplicationChunkSize = (int) Configuration.getBytes( + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_CHUNK_SIZE); + private final StreamObserver mObserver; + private long mTotalBytesSent = 0; + private byte[] mBuffer = new byte[mSnapshotReplicationChunkSize]; + private int mBufferPosition = 0; + + public SnapshotGrpcOutputStream(StreamObserver responseObserver) { + mObserver = responseObserver; + } + + @Override + public void write(int b) { + mBuffer[mBufferPosition++] = (byte) b; + if (mBufferPosition == mBuffer.length) { + flushBuffer(); + } + } + + @Override + public void close() { + if (mBufferPosition > 0) { + flushBuffer(); + } + } + + private void flushBuffer() { + // avoids copy + ByteString bytes = UnsafeByteOperations.unsafeWrap(mBuffer, 0, mBufferPosition); + mBuffer = new byte[mSnapshotReplicationChunkSize]; + LOG.debug("Sending chunk of size {}: {}", mBufferPosition, bytes); + mObserver.onNext(SnapshotData.newBuilder().setChunk(bytes).build()); + mTotalBytesSent += mBufferPosition; + mBufferPosition = 0; + } + + public long totalBytes() { + return mTotalBytesSent + mBufferPosition; + } } } diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java index 999484b1279c..37ecc043c3cc 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java @@ -164,7 +164,7 @@ * so we allow a snapshot to be taken once a day at a user-configured time. To support this, * all state changes must first acquire a read lock, and snapshotting requires the * corresponding write lock. Once we have the write lock for all state machines, we enable - * snapshots in Copycat through our AtomicBoolean, then wait for any snapshot to complete. + * snapshots in Ratis through our AtomicBoolean, then wait for any snapshot to complete. */ @ThreadSafe public class RaftJournalSystem extends AbstractJournalSystem { @@ -209,6 +209,12 @@ public class RaftJournalSystem extends AbstractJournalSystem { * and installing snapshots. */ private JournalStateMachine mStateMachine; + + /** + * Serves as the storage object for the above state machine. + */ + private final SnapshotDirStateMachineStorage mStateMachineStorage = + new SnapshotDirStateMachineStorage(); /** * Ratis server. */ @@ -294,7 +300,7 @@ private synchronized void initServer() throws IOException { if (mStateMachine != null) { mStateMachine.close(); } - mStateMachine = new JournalStateMachine(mJournals, this); + mStateMachine = new JournalStateMachine(mJournals, this, mStateMachineStorage); RaftProperties properties = new RaftProperties(); Parameters parameters = new Parameters(); @@ -373,7 +379,7 @@ private synchronized void initServer() throws IOException { TimeUnit.MILLISECONDS)); // snapshot retention - RaftServerConfigKeys.Snapshot.setRetentionFileNum(properties, 3); + RaftServerConfigKeys.Snapshot.setRetentionFileNum(properties, 2); // unsafe flush RaftServerConfigKeys.Log.setUnsafeFlushEnabled(properties, @@ -545,6 +551,7 @@ public synchronized void gainPrimacy() { mAsyncJournalWriter .set(new AsyncJournalWriter(mRaftJournalWriter, () -> getJournalSinks(null))); mTransferLeaderAllowed.set(true); + super.registerMetrics(); LOG.info("Gained primacy."); } @@ -655,7 +662,7 @@ public synchronized void checkpoint(StateLockManager stateLockManager) throws IO public synchronized Map getJournalServices() { Map services = new HashMap<>(); services.put(alluxio.grpc.ServiceType.RAFT_JOURNAL_SERVICE, new GrpcService( - new RaftJournalServiceHandler(mStateMachine.getSnapshotReplicationManager()))); + new RaftJournalServiceHandler(mStateMachineStorage))); return services; } @@ -833,13 +840,10 @@ public synchronized void stopInternal() { if (mRaftJournalWriter != null) { mRaftJournalWriter.close(); } - mStateMachine.setServerClosing(); try { mServer.close(); } catch (IOException e) { throw new RuntimeException("Failed to shut down Raft server", e); - } finally { - mStateMachine.afterServerClosing(); } LOG.info("Journal shutdown complete"); } @@ -1191,6 +1195,11 @@ synchronized RaftServer getRaftServer() { return mServer; } + @VisibleForTesting + ConcurrentHashMap getJournals() { + return mJournals; + } + /** * Updates raft group with the current values from raft server. */ @@ -1202,8 +1211,9 @@ public synchronized void updateGroup() { } } + @VisibleForTesting @Nullable - private RaftProtos.RoleInfoProto getRaftRoleInfo() { + RaftProtos.RoleInfoProto getRaftRoleInfo() { GroupInfoReply groupInfo = null; try { groupInfo = getGroupInfo(); diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftSnapshotManager.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftSnapshotManager.java new file mode 100644 index 000000000000..7bd6980e99bb --- /dev/null +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftSnapshotManager.java @@ -0,0 +1,313 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.raft; + +import alluxio.AbstractClient; +import alluxio.Constants; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.SnapshotData; +import alluxio.grpc.SnapshotMetadata; +import alluxio.master.selectionpolicy.MasterSelectionPolicy; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.retry.ExponentialBackoffRetry; +import alluxio.retry.RetryPolicy; +import alluxio.util.ConfigurationUtils; +import alluxio.util.compression.DirectoryMarshaller; +import alluxio.util.logging.SamplingLogger; +import alluxio.util.network.NetworkAddressUtils; + +import com.codahale.metrics.Timer; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.ratis.server.protocol.TermIndex; +import org.apache.ratis.server.raftlog.RaftLog; +import org.apache.ratis.statemachine.SnapshotInfo; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.InputStream; +import java.net.InetSocketAddress; +import java.nio.ByteBuffer; +import java.time.Duration; +import java.time.Instant; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import javax.annotation.Nullable; + +/** + * Manages a snapshot download. + */ +public class RaftSnapshotManager implements AutoCloseable { + private static final Logger LOG = LoggerFactory.getLogger(RaftSnapshotManager.class); + private static final Logger SAMPLING_LOG = new SamplingLogger(LOG, 10L * Constants.SECOND_MS); + + private final int mRequestInfoTimeout = (int) + Configuration.getMs(PropertyKey.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT); + + private final SnapshotDirStateMachineStorage mStorage; + private final ExecutorService mExecutor; + private final Map mClients; + + private volatile long mLastSnapshotDownloadDurationMs = -1; + private volatile long mLastSnapshotDownloadSize = -1; + private volatile long mLastSnapshotDownloadDiskSize = -1; + + @Nullable + private CompletableFuture mDownloadFuture = null; + + RaftSnapshotManager(SnapshotDirStateMachineStorage storage, ExecutorService executor) { + mStorage = storage; + mExecutor = executor; + + InetSocketAddress localAddress = NetworkAddressUtils.getConnectAddress( + NetworkAddressUtils.ServiceType.MASTER_RPC, Configuration.global()); + mClients = ConfigurationUtils.getMasterRpcAddresses(Configuration.global()).stream() + .filter(address -> !address.equals(localAddress)) + .collect(Collectors.toMap(Function.identity(), address -> { + MasterSelectionPolicy selection = MasterSelectionPolicy.Factory.specifiedMaster(address); + int numTries = 10; + int sleep = Math.max(1, mRequestInfoTimeout / numTries); + // try to connect to other master once per second for until request info timeout + Supplier retry = () -> new ExponentialBackoffRetry(sleep, sleep, numTries); + return new RaftJournalServiceClient(selection, retry); + })); + + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DOWNLOAD_DURATION_MS.getName(), + () -> mLastSnapshotDownloadDurationMs); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DOWNLOAD_SIZE.getName(), + () -> mLastSnapshotDownloadSize); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DOWNLOAD_DISK_SIZE.getName(), + () -> mLastSnapshotDownloadDiskSize); + } + + /** + * Waits synchronously for the download attempt to be complete. + * @return the result of the download attempt, or {@link RaftLog#INVALID_LOG_INDEX} if no + * attempt is underway + */ + public long waitForAttemptToComplete() { + if (mDownloadFuture == null) { + return RaftLog.INVALID_LOG_INDEX; + } + mDownloadFuture.join(); + // this is to make sure that mDownloadFuture gets reset to null + return downloadSnapshotFromOtherMasters(); + } + + /** + * Launches an asynchronous download of the most updated snapshot found on other masters in the + * cluster. If the asynchronous download is already in flight, it polls for the results. + * @return the log index of the last successful snapshot installation, or -1 if the download is in + * flight or has failed. + */ + public long downloadSnapshotFromOtherMasters() { + if (mClients.isEmpty()) { + SAMPLING_LOG.warn("No followers are present to download a snapshot from"); + return RaftLog.INVALID_LOG_INDEX; + } + if (mDownloadFuture == null) { + mDownloadFuture = CompletableFuture.supplyAsync(this::core, mExecutor).exceptionally(err -> { + LOG.debug("Failed to download snapshot", err); + return RaftLog.INVALID_LOG_INDEX; + }); + } else if (mDownloadFuture.isDone()) { + LOG.debug("Download operation is done"); + Long snapshotIndex = mDownloadFuture.join(); + LOG.debug("Retrieved downloaded snapshot at index {}", snapshotIndex); + mDownloadFuture = null; + return snapshotIndex; + } + return RaftLog.INVALID_LOG_INDEX; + } + + private long core() { + SnapshotInfo localSnapshotInfo = mStorage.getLatestSnapshot(); + if (localSnapshotInfo == null) { + LOG.info("No local snapshot found"); + } else { + LOG.info("Local snapshot is {}", TermIndex.valueOf(localSnapshotInfo.getTerm(), + localSnapshotInfo.getIndex())); + } + // max heap based on TermIndex extracted from the SnapshotMetadata of each pair + PriorityQueue> otherInfos = + new PriorityQueue<>(Math.max(1, mClients.size()), + Collections.reverseOrder(Comparator.comparing(pair -> toTermIndex(pair.getLeft())))); + // wait mRequestInfoTimeout between each attempt to contact the masters + RetryPolicy retryPolicy = + new ExponentialBackoffRetry(mRequestInfoTimeout, mRequestInfoTimeout, 10); + while (otherInfos.isEmpty() && retryPolicy.attempt()) { + LOG.debug("Attempt to retrieve info"); + otherInfos.addAll(retrieveFollowerInfos(localSnapshotInfo)); + LOG.debug("Attempt to retrieve info over"); + } + + while (!otherInfos.isEmpty()) { + ImmutablePair info = otherInfos.poll(); + InetSocketAddress address = info.getRight(); + SnapshotMetadata snapshotMetadata = info.getLeft(); + long index; + if ((index = downloadSnapshotFromAddress(snapshotMetadata, address)) + != RaftLog.INVALID_LOG_INDEX) { + return index; + } + } + return RaftLog.INVALID_LOG_INDEX; + } + + /** + * @param localSnapshotInfo contains information about the most up-to-date snapshot on this master + * @return a sorted list of pairs containing a follower's address and its most up-to-date snapshot + */ + private List> retrieveFollowerInfos( + SnapshotInfo localSnapshotInfo) { + return mClients.keySet().parallelStream() + // map to a pair of (address, SnapshotMetadata) by requesting all followers in parallel + .map(address -> { + RaftJournalServiceClient client = mClients.get(address); + try { + client.connect(); + LOG.info("Receiving snapshot info from {}", address); + SnapshotMetadata metadata = client.requestLatestSnapshotInfo(); + if (!metadata.getExists()) { + LOG.info("No snapshot is present on {}", address); + } else { + LOG.info("Received snapshot info {} from {}", toTermIndex(metadata), address); + } + return ImmutablePair.of(metadata, address); + } catch (Exception e) { + client.disconnect(); + LOG.debug("Failed to retrieve snapshot info from {}", address, e); + return ImmutablePair.of(SnapshotMetadata.newBuilder().setExists(false).build(), + address); + } + }) + // filter out followers that do not have any snapshot or no updated snapshot + .filter(pair -> pair.getLeft().getExists() && (localSnapshotInfo == null + || localSnapshotInfo.getTermIndex().compareTo(toTermIndex(pair.getLeft())) < 0)) + .collect(Collectors.toList()); + } + + /** + * Retrieves snapshot from the specified address. + * @param snapshotMetadata helps identify which snapshot is desired + * @param address where to retrieve it from + * @return the index of the snapshot taken + */ + private long downloadSnapshotFromAddress(SnapshotMetadata snapshotMetadata, + InetSocketAddress address) { + TermIndex index = toTermIndex(snapshotMetadata); + LOG.info("Retrieving snapshot {} from {}", index, address); + Instant start = Instant.now(); + RaftJournalServiceClient client = mClients.get(address); + try { + client.connect(); + Iterator it = client.requestLatestSnapshotData(snapshotMetadata); + long totalBytesRead; + long snapshotDiskSize; + try (SnapshotGrpcInputStream stream = new SnapshotGrpcInputStream(it)) { + DirectoryMarshaller marshaller = DirectoryMarshaller.Factory.create(); + snapshotDiskSize = marshaller.read(mStorage.getTmpDir().toPath(), stream); + totalBytesRead = stream.totalBytes(); + } + + File finalSnapshotDestination = new File(mStorage.getSnapshotDir(), + SimpleStateMachineStorage.getSnapshotFileName(snapshotMetadata.getSnapshotTerm(), + snapshotMetadata.getSnapshotIndex())); + FileUtils.moveDirectory(mStorage.getTmpDir(), finalSnapshotDestination); + // update last duration and duration timer metrics + mLastSnapshotDownloadDurationMs = Duration.between(start, Instant.now()).toMillis(); + MetricsSystem.timer(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_TIMER.getName()) + .update(mLastSnapshotDownloadDurationMs, TimeUnit.MILLISECONDS); + LOG.info("Total milliseconds to download {}: {}", index, mLastSnapshotDownloadDurationMs); + // update uncompressed snapshot size metric + mLastSnapshotDownloadDiskSize = snapshotDiskSize; + MetricsSystem.histogram( + MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_DISK_HISTOGRAM.getName()) + .update(mLastSnapshotDownloadDiskSize); + LOG.info("Total extracted bytes of snapshot {}: {}", index, mLastSnapshotDownloadDiskSize); + // update compressed snapshot size (aka size sent over the network) + mLastSnapshotDownloadSize = totalBytesRead; + MetricsSystem.histogram( + MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_HISTOGRAM.getName()) + .update(mLastSnapshotDownloadSize); + LOG.info("Total bytes read from {} for {}: {}", address, index, mLastSnapshotDownloadSize); + try (Timer.Context ctx = MetricsSystem.timer( + MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_INSTALL_TIMER.getName()).time()) { + mStorage.loadLatestSnapshot(); + mStorage.signalNewSnapshot(); + } + LOG.info("Retrieved snapshot {} from {}", index, address); + return snapshotMetadata.getSnapshotIndex(); + } catch (Exception e) { + client.disconnect(); + LOG.warn("Failed to download snapshot {} from {}", index, address); + LOG.debug("Download failure error", e); + return RaftLog.INVALID_LOG_INDEX; + } finally { + FileUtils.deleteQuietly(mStorage.getTmpDir()); + } + } + + @Override + public void close() { + mClients.values().forEach(AbstractClient::close); + } + + private TermIndex toTermIndex(SnapshotMetadata metadata) { + return TermIndex.valueOf(metadata.getSnapshotTerm(), metadata.getSnapshotIndex()); + } + + static class SnapshotGrpcInputStream extends InputStream { + private final Iterator mIt; + private long mTotalBytesRead = 0; + // using a read-only ByteBuffer avoids array copy + private ByteBuffer mCurrentBuffer = ByteBuffer.allocate(0); + + public SnapshotGrpcInputStream(Iterator iterator) { + mIt = iterator; + } + + @Override + public int read() { + if (!mCurrentBuffer.hasRemaining()) { + if (!mIt.hasNext()) { + return -1; + } + mCurrentBuffer = mIt.next().getChunk().asReadOnlyByteBuffer(); + LOG.debug("Received chunk of size {}: {}", mCurrentBuffer.capacity(), mCurrentBuffer); + mTotalBytesRead += mCurrentBuffer.capacity(); + } + return Byte.toUnsignedInt(mCurrentBuffer.get()); + } + + public long totalBytes() { + return mTotalBytesRead; + } + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorage.java b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorage.java new file mode 100644 index 000000000000..1fcc21826971 --- /dev/null +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorage.java @@ -0,0 +1,162 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.raft; + +import alluxio.annotation.SuppressFBWarnings; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.filefilter.NotFileFilter; +import org.apache.commons.io.filefilter.SuffixFileFilter; +import org.apache.commons.io.filefilter.TrueFileFilter; +import org.apache.ratis.io.MD5Hash; +import org.apache.ratis.server.protocol.TermIndex; +import org.apache.ratis.server.storage.FileInfo; +import org.apache.ratis.server.storage.RaftStorage; +import org.apache.ratis.statemachine.SnapshotInfo; +import org.apache.ratis.statemachine.SnapshotRetentionPolicy; +import org.apache.ratis.statemachine.StateMachineStorage; +import org.apache.ratis.statemachine.impl.FileListSnapshotInfo; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; +import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; +import org.apache.ratis.util.MD5FileUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.stream.Stream; +import javax.annotation.Nullable; + +/** + * Simple state machine storage that can handle directories. + */ +public class SnapshotDirStateMachineStorage implements StateMachineStorage { + private static final Logger LOG = LoggerFactory.getLogger(SnapshotDirStateMachineStorage.class); + + private RaftStorage mStorage; + @Nullable + private volatile SnapshotInfo mLatestSnapshotInfo = null; + private volatile boolean mNewSnapshotTaken = false; + + private final Comparator mSnapshotPathComparator = Comparator.comparing( + path -> SimpleStateMachineStorage.getTermIndexFromSnapshotFile(path.toFile())); + + /** + * @param path to evaluate + * @return a matcher to evaluate if the leaf of the provided path has a name that matches the + * pattern of snapshot directories + */ + @SuppressFBWarnings(value = "NP_NULL_ON_SOME_PATH_FROM_RETURN_VALUE", + justification = "argument 'path' is never null, and method 'matcher' returns NotNull") + public static Matcher matchSnapshotPath(Path path) { + return SimpleStateMachineStorage.SNAPSHOT_REGEX.matcher(path.getFileName().toString()); + } + + @Override + public void init(RaftStorage raftStorage) throws IOException { + mStorage = raftStorage; + loadLatestSnapshot(); + } + + private SnapshotInfo findLatestSnapshot() { + try (Stream stream = Files.list(getSnapshotDir().toPath())) { + Optional max = stream + .filter(path -> matchSnapshotPath(path).matches()) + .max(mSnapshotPathComparator); + if (max.isPresent()) { + TermIndex ti = SimpleStateMachineStorage.getTermIndexFromSnapshotFile(max.get().toFile()); + // for backwards compatibility with previous versions of snapshots + if (max.get().toFile().isFile()) { + MD5Hash md5Hash = MD5FileUtil.readStoredMd5ForFile(max.get().toFile()); + FileInfo fileInfo = new FileInfo(max.get(), md5Hash); + return new SingleFileSnapshotInfo(fileInfo, ti.getTerm(), ti.getIndex()); + } + // new snapshot format + List fileInfos = new ArrayList<>(); + Collection nonMd5Files = FileUtils.listFiles(max.get().toFile(), + new NotFileFilter(new SuffixFileFilter(MD5FileUtil.MD5_SUFFIX)), + TrueFileFilter.INSTANCE); + for (File file : nonMd5Files) { + MD5Hash md5Hash = MD5FileUtil.readStoredMd5ForFile(file); // null if no md5 file + Path relativePath = max.get().relativize(file.toPath()); + fileInfos.add(new FileInfo(relativePath, md5Hash)); + } + return new FileListSnapshotInfo(fileInfos, ti.getTerm(), ti.getIndex()); + } + } catch (Exception e) { + // Files.list may throw an unchecked exception + // do nothing and return null + LOG.warn("Error reading snapshot directory", e); + } + return null; + } + + /** + * Loads the latest snapshot information into the StateMachineStorage. + */ + public void loadLatestSnapshot() { + mLatestSnapshotInfo = findLatestSnapshot(); + } + + @Override @Nullable + public SnapshotInfo getLatestSnapshot() { + return mLatestSnapshotInfo; + } + + @Override + public void format() throws IOException {} + + /** + * Signal to the StateMachineStorage that a new snapshot was taken. + */ + public void signalNewSnapshot() { + mNewSnapshotTaken = true; + } + + @Override + public void cleanupOldSnapshots(SnapshotRetentionPolicy retentionPolicy) throws IOException { + if (!mNewSnapshotTaken) { + LOG.trace("No new snapshot to delete old one"); + return; + } + mNewSnapshotTaken = false; + try (Stream stream = Files.list(getSnapshotDir().toPath())) { + stream.filter(path -> matchSnapshotPath(path).matches()) + .sorted(Collections.reverseOrder(mSnapshotPathComparator)) + .skip(retentionPolicy.getNumSnapshotsRetained()) + .forEach(path -> { + LOG.debug("removing dir {}", path.getFileName()); + boolean b = FileUtils.deleteQuietly(path.toFile()); + LOG.debug("{}successful deletion", b ? "" : "un"); + }); + } + } + + @Override + public File getSnapshotDir() { + return mStorage.getStorageDir().getStateMachineDir(); + } + + @Override + public File getTmpDir() { + return mStorage.getStorageDir().getTmpDir(); + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDownloader.java b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDownloader.java deleted file mode 100644 index a632aaef38c9..000000000000 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDownloader.java +++ /dev/null @@ -1,212 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.journal.raft; - -import alluxio.grpc.DownloadSnapshotPRequest; -import alluxio.grpc.DownloadSnapshotPResponse; -import alluxio.grpc.SnapshotData; -import alluxio.grpc.UploadSnapshotPRequest; -import alluxio.grpc.UploadSnapshotPResponse; - -import io.grpc.stub.ClientCallStreamObserver; -import io.grpc.stub.ClientResponseObserver; -import io.grpc.stub.StreamObserver; -import org.apache.ratis.io.MD5Hash; -import org.apache.ratis.server.protocol.TermIndex; -import org.apache.ratis.server.storage.FileInfo; -import org.apache.ratis.statemachine.SnapshotInfo; -import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; -import org.apache.ratis.util.MD5FileUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.concurrent.CompletableFuture; -import java.util.function.Function; - -/** - * A stream observer for downloading a snapshot. - * - * @param type of the message to send - * @param type of the message to receive - */ -public class SnapshotDownloader implements ClientResponseObserver { - private static final Logger LOG = LoggerFactory.getLogger(SnapshotDownloader.class); - - private final SimpleStateMachineStorage mStorage; - private final CompletableFuture mFuture = new CompletableFuture<>(); - private final Function mMessageBuilder; - private final Function mDataGetter; - private final String mSource; - - /** The term and index for the latest journal entry included in the snapshot. */ - private TermIndex mTermIndex; - private File mTempFile; - private FileOutputStream mOutputStream; - private long mBytesWritten = 0; - private StreamObserver mStream; - private SnapshotInfo mSnapshotToInstall; - - /** - * Builds a stream for leader to download a snapshot. - * - * @param storage the snapshot storage - * @param stream the response stream - * @param source the source of the snapshot - * @return the download stream for leader - */ - public static SnapshotDownloader forLeader( - SimpleStateMachineStorage storage, StreamObserver stream, - String source) { - return new SnapshotDownloader<>(storage, - offset -> UploadSnapshotPResponse.newBuilder().setOffsetReceived(offset).build(), - UploadSnapshotPRequest::getData, stream, source); - } - - /** - * Builds a stream for follower to download a snapshot. - * - * @param storage the snapshot storage - * @param source the source of the snapshot - * @return the download stream for follower - */ - public static SnapshotDownloader - forFollower(SimpleStateMachineStorage storage, String source) { - return new SnapshotDownloader<>(storage, - offset -> DownloadSnapshotPRequest.newBuilder().setOffsetReceived(offset).build(), - DownloadSnapshotPResponse::getData, null, source); - } - - private SnapshotDownloader(SimpleStateMachineStorage storage, Function messageBuilder, - Function dataGetter, StreamObserver stream, String source) { - mStorage = storage; - mMessageBuilder = messageBuilder; - mDataGetter = dataGetter; - mStream = stream; - mSource = source; - } - - @Override - public void onNext(R response) { - try { - onNextInternal(response); - } catch (Exception e) { - mStream.onError(e); - mFuture.completeExceptionally(e); - cleanup(); - } - } - - private void cleanup() { - if (mOutputStream != null) { - try { - mOutputStream.close(); - } catch (IOException ioException) { - LOG.error("Error closing snapshot file {}", mTempFile, ioException); - } - } - if (mTempFile != null && !mTempFile.delete()) { - LOG.error("Error deleting snapshot file {}", mTempFile.getPath()); - } - } - - private void onNextInternal(R response) throws IOException { - TermIndex termIndex = TermIndex.valueOf( - mDataGetter.apply(response).getSnapshotTerm(), - mDataGetter.apply(response).getSnapshotIndex()); - if (mTermIndex == null) { - LOG.info("Downloading new snapshot {} from {}", termIndex, mSource); - mTermIndex = termIndex; - // start a new file - mTempFile = RaftJournalUtils.createTempSnapshotFile(mStorage); - - mTempFile.deleteOnExit(); - mStream.onNext(mMessageBuilder.apply(0L)); - } else { - if (!termIndex.equals(mTermIndex)) { - throw new IOException(String.format( - "Mismatched term index when downloading the snapshot. expected: %s actual: %s", - mTermIndex, termIndex)); - } - if (!mDataGetter.apply(response).hasChunk()) { - throw new IOException(String.format( - "A chunk for file %s is missing from the response %s.", mTempFile, response)); - } - // write the chunk - if (mOutputStream == null) { - LOG.info("Start writing to temporary file {}", mTempFile.getPath()); - mOutputStream = new FileOutputStream(mTempFile); - } - long position = mOutputStream.getChannel().position(); - if (position != mDataGetter.apply(response).getOffset()) { - throw new IOException( - String.format("Mismatched offset in file %d, expect %d, bytes written %d", - position, mDataGetter.apply(response).getOffset(), mBytesWritten)); - } - mOutputStream.write(mDataGetter.apply(response).getChunk().toByteArray()); - mBytesWritten += mDataGetter.apply(response).getChunk().size(); - LOG.debug("Written {} bytes to snapshot file {}", mBytesWritten, mTempFile.getPath()); - if (mDataGetter.apply(response).getEof()) { - LOG.debug("Completed writing to temporary file {} with size {}", - mTempFile.getPath(), mOutputStream.getChannel().position()); - mOutputStream.close(); - mOutputStream = null; - final MD5Hash digest = MD5FileUtil.computeMd5ForFile(mTempFile); - mSnapshotToInstall = new SingleFileSnapshotInfo( - new FileInfo(mTempFile.toPath(), digest), - mTermIndex.getTerm(), mTermIndex.getIndex()); - mFuture.complete(mTermIndex); - LOG.info("Finished copying snapshot to local file {}.", mTempFile); - mStream.onCompleted(); - } else { - mStream.onNext(mMessageBuilder.apply(mBytesWritten)); - } - } - } - - @Override - public void onError(Throwable t) { - mFuture.completeExceptionally(t); - cleanup(); - } - - @Override - public void onCompleted() { - if (mOutputStream != null) { - mFuture.completeExceptionally( - new IllegalStateException("Request completed with unfinished upload")); - cleanup(); - } - } - - @Override - public void beforeStart(ClientCallStreamObserver requestStream) { - mStream = requestStream; - } - - /** - * @return a future that tracks when the stream is completed - */ - public CompletableFuture getFuture() { - return mFuture; - } - - /** - * @return the snapshot information if it is downloaded completely, or null otherwise - */ - public SnapshotInfo getSnapshotToInstall() { - return mSnapshotToInstall; - } -} diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotIdJournaled.java b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotIdJournaled.java new file mode 100644 index 000000000000..fc8457d4b966 --- /dev/null +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotIdJournaled.java @@ -0,0 +1,30 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.raft; + +import alluxio.master.journal.SingleEntryJournaled; +import alluxio.master.journal.checkpoint.CheckpointName; +import alluxio.proto.journal.Journal; + +/** + * Simple implementation to write and recover the snapshot ID when checkpointing. The snapshot ID + * is a long that represents the sequence number of the last entry that was processed by the + * journal. The snapshot ID will be inserted into and retrieved through the + * {@link Journal.JournalEntry.Builder#setSequenceNumber(long)} and + * {@link Journal.JournalEntry.Builder#getSequenceNumber()} methods, respectively. + */ +public class SnapshotIdJournaled extends SingleEntryJournaled { + @Override + public CheckpointName getCheckpointName() { + return CheckpointName.SNAPSHOT_ID; + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotReplicationManager.java b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotReplicationManager.java deleted file mode 100644 index d999d0494024..000000000000 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotReplicationManager.java +++ /dev/null @@ -1,619 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.journal.raft; - -import alluxio.ClientContext; -import alluxio.Constants; -import alluxio.collections.Pair; -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.exception.status.AbortedException; -import alluxio.exception.status.AlluxioStatusException; -import alluxio.exception.status.NotFoundException; -import alluxio.grpc.DownloadSnapshotPRequest; -import alluxio.grpc.DownloadSnapshotPResponse; -import alluxio.grpc.GetSnapshotInfoRequest; -import alluxio.grpc.GetSnapshotInfoResponse; -import alluxio.grpc.GetSnapshotRequest; -import alluxio.grpc.JournalQueryRequest; -import alluxio.grpc.JournalQueryResponse; -import alluxio.grpc.QuorumServerState; -import alluxio.grpc.SnapshotData; -import alluxio.grpc.SnapshotMetadata; -import alluxio.grpc.UploadSnapshotPRequest; -import alluxio.grpc.UploadSnapshotPResponse; -import alluxio.master.MasterClientContext; -import alluxio.metrics.MetricKey; -import alluxio.metrics.MetricsSystem; -import alluxio.resource.LockResource; -import alluxio.security.authentication.ClientIpAddressInjector; -import alluxio.util.FormatUtils; -import alluxio.util.LogUtils; -import alluxio.util.logging.SamplingLogger; - -import com.codahale.metrics.Timer; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.protobuf.MessageLite; -import io.grpc.Status; -import io.grpc.stub.StreamObserver; -import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftClientReply; -import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.server.protocol.TermIndex; -import org.apache.ratis.server.raftlog.RaftLog; -import org.apache.ratis.server.storage.FileInfo; -import org.apache.ratis.statemachine.SnapshotInfo; -import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; -import org.apache.ratis.thirdparty.com.google.protobuf.UnsafeByteOperations; -import org.apache.ratis.util.MD5FileUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.time.Duration; -import java.time.Instant; -import java.util.Map; -import java.util.Objects; -import java.util.PriorityQueue; -import java.util.concurrent.CancellationException; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; -import java.util.concurrent.locks.Condition; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; -import java.util.function.Function; -import java.util.stream.Collectors; - -/** - * Class for managing snapshot replication among masters. - * It manages two snapshot replication workflows - worker to master and master to worker. - * - * 1. Worker to Master - * When a raft leader needs a snapshot, instead of taking snapshot locally it copies a recent - * snapshot from one of the followers. - * - * Workflow: - * - * - Ratis calls leader state machine to take a snapshot - * - leader gets snapshot metadata from follower - * - leader pick one of the follower and send a request for copying the snapshot - * - follower receives the request and calls the leader raft journal service to upload the snapshot - * - after the upload completes, leader remembers the temporary snapshot location and index - * - Ratis calls the leader state machine again to take a snapshot - * - leader moves the temporary snapshot to the journal snapshot folder and returns snapshot index - * - * 2. Master to Worker - * When a raft follower receives a notification to download a snapshot, it downloads the latest - * snapshot from the leader. - * - * Workflow: - * - * - Ratis leader determines one of the follower needs a snapshot because it misses journal entries - * from a long time ago - * - Ratis leader notifies Ratis follower to install a snapshot from leader, the follower calls the - * Alluxio state machine to fulfill this request - * - the follower state machine calls the snapshot manager which calls the raft journal service from - * leader to download a snapshot - * - after the downloads completes, follower moves the file to snapshot directory and gives Ratis - * the snapshot index - */ -public class SnapshotReplicationManager { - private static final Logger LOG = LoggerFactory.getLogger(SnapshotReplicationManager.class); - private static final Logger SAMPLING_LOG = new SamplingLogger(LOG, 5L * Constants.SECOND_MS); - - private final SimpleStateMachineStorage mStorage; - private final RaftJournalSystem mJournalSystem; - private volatile SnapshotInfo mDownloadedSnapshot; - private final PriorityQueue> mSnapshotCandidates; - private Future mRequestDataFuture; - private final Lock mRequestDataLock = new ReentrantLock(); - private final Condition mRequestDataCondition = mRequestDataLock.newCondition(); - private final ExecutorService mRequestDataExecutor = Executors.newSingleThreadExecutor(); - - private static final long SNAPSHOT_INFO_TIMEOUT_MS = - Configuration.getMs(PropertyKey.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT); - private static final long SNAPSHOT_DATA_TIMEOUT_MS = - Configuration.getMs(PropertyKey.MASTER_JOURNAL_REQUEST_DATA_TIMEOUT); - - private enum DownloadState { - /** No snapshot download is in progress. */ - IDLE, - - /** Snapshot information is requested from available followers. */ - REQUEST_INFO, - - /** The latest snapshot data is requested from one of the followers. */ - REQUEST_DATA, - - /** The latest snapshot is being downloaded from one of the followers. */ - STREAM_DATA, - - /** A snapshot is downloaded and ready for installation. */ - DOWNLOADED, - - /** A snapshot is being installed to the journal storage. */ - INSTALLING, - } - - private final AtomicReference mDownloadState = - new AtomicReference<>(DownloadState.IDLE); - - /** - * @param journalSystem the raft journal system - * @param storage the snapshot storage - */ - public SnapshotReplicationManager(RaftJournalSystem journalSystem, - SimpleStateMachineStorage storage) { - mStorage = storage; - mJournalSystem = journalSystem; - mSnapshotCandidates = new PriorityQueue<>((pair1, pair2) -> { - SnapshotMetadata first = pair1.getFirst(); - SnapshotMetadata second = pair2.getFirst(); - // deliberately reversing the compare order to have bigger numbers rise to the top - // bigger terms and indexes means a more recent snapshot - if (first.getSnapshotTerm() == second.getSnapshotTerm()) { - return Long.compare(second.getSnapshotIndex(), first.getSnapshotIndex()); - } - return Long.compare(second.getSnapshotTerm(), first.getSnapshotTerm()); - }); - } - - /** - * Downloads and installs a snapshot from the leader. - * - * @return a future with the term index of the installed snapshot - */ - public CompletableFuture installSnapshotFromLeader() { - if (mJournalSystem.isLeader()) { - return RaftJournalUtils.completeExceptionally( - new IllegalStateException("Abort snapshot installation after becoming a leader")); - } - if (!transitionState(DownloadState.IDLE, DownloadState.STREAM_DATA)) { - return RaftJournalUtils.completeExceptionally( - new IllegalStateException("State is not IDLE when starting a snapshot installation")); - } - try { - RaftJournalServiceClient client = createJournalServiceClient(); - String address = String.valueOf(client.getRemoteSockAddress()); - SnapshotDownloader observer = - SnapshotDownloader.forFollower(mStorage, address); - Timer.Context ctx = MetricsSystem - .timer(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_TIMER.getName()).time(); - client.downloadSnapshot(observer); - return observer.getFuture().thenApplyAsync((termIndex) -> { - ctx.close(); - mDownloadedSnapshot = observer.getSnapshotToInstall(); - transitionState(DownloadState.STREAM_DATA, DownloadState.DOWNLOADED); - long index = installDownloadedSnapshot(); - if (index == RaftLog.INVALID_LOG_INDEX) { - throw new CompletionException(new RuntimeException( - String.format("Failed to install the downloaded snapshot %s", termIndex))); - } - if (index != termIndex.getIndex()) { - throw new CompletionException(new IllegalStateException( - String.format("Mismatched snapshot installed - downloaded %d, installed %d", - termIndex.getIndex(), index))); - } - return termIndex; - }).whenComplete((termIndex, throwable) -> { - if (throwable != null) { - LOG.error("Unexpected exception downloading snapshot from leader {}.", address, - throwable); - transitionState(DownloadState.STREAM_DATA, DownloadState.IDLE); - } - client.close(); - }); - } catch (Exception e) { - transitionState(DownloadState.STREAM_DATA, DownloadState.IDLE); - return RaftJournalUtils.completeExceptionally(e); - } - } - - /** - * Sends a snapshot to the leader. - * - * @throws IOException if error occurs while initializing the data stream - */ - public void sendSnapshotToLeader() throws IOException { - if (mJournalSystem.isLeader()) { - throw new IllegalStateException("Server is no longer a follower"); - } - LOG.debug("Checking latest snapshot to send"); - SnapshotInfo snapshot = mStorage.getLatestSnapshot(); - if (snapshot == null) { - throw new NotFoundException("No snapshot available"); - } - - SnapshotUploader snapshotUploader = - SnapshotUploader.forFollower(mStorage, snapshot); - RaftJournalServiceClient client = createJournalServiceClient(); - LOG.info("Sending stream request to leader {} for snapshot {}", client.getRemoteSockAddress(), - snapshot.getTermIndex()); - StreamObserver requestObserver = - client.uploadSnapshot(snapshotUploader); - requestObserver.onNext(UploadSnapshotPRequest.newBuilder() - .setData(SnapshotData.newBuilder() - .setSnapshotTerm(snapshot.getTerm()) - .setSnapshotIndex(snapshot.getIndex()) - .setOffset(0)) - .build()); - snapshotUploader.getCompletionFuture().whenComplete((info, t) -> client.close()); - } - - /** - * Attempts to copy a snapshot from one of the followers. - * - * The leader state machine calls this method regularly when it needs a new snapshot. - * To avoid blocking normal journal operations, This method always returns a value immediately - * without waiting for download to finish: - * - * - If no download is in progress, it schedules a new download asynchronously and returns - * {@link RaftLog#INVALID_LOG_INDEX}. - * - If a download is in progress, it returns {@link RaftLog#INVALID_LOG_INDEX} immediately. - * - If a download is completed, it moves the downloaded file to the snapshot directory and - * returns the snapshot index. - * - * @return the index of the downloaded snapshot, or {@link RaftLog#INVALID_LOG_INDEX} - * if no snapshot is installed. - */ - public long maybeCopySnapshotFromFollower() { - if (mDownloadState.get() == DownloadState.DOWNLOADED) { - return installDownloadedSnapshot(); - } - SAMPLING_LOG.info("Call copy snapshot from follower in state {}", mDownloadState.get()); - if (mDownloadState.get() == DownloadState.IDLE) { - CompletableFuture.runAsync(this::requestSnapshotFromFollowers); - } - return RaftLog.INVALID_LOG_INDEX; - } - - /** - * Receives a snapshot from follower. - * - * @param responseStreamObserver the response stream observer - * @return the request stream observer - */ - public StreamObserver receiveSnapshotFromFollower( - StreamObserver responseStreamObserver) { - String followerIp = ClientIpAddressInjector.getIpAddress(); - LOG.info("Received upload snapshot request from follower {}", followerIp); - - SnapshotDownloader observer = - SnapshotDownloader.forLeader(mStorage, responseStreamObserver, - followerIp); - if (!transitionState(DownloadState.REQUEST_DATA, DownloadState.STREAM_DATA)) { - responseStreamObserver.onCompleted(); - return observer; - } - observer.getFuture() - .thenApply(termIndex -> { - try (LockResource ignored = new LockResource(mRequestDataLock)) { - mDownloadedSnapshot = observer.getSnapshotToInstall(); - transitionState(DownloadState.STREAM_DATA, DownloadState.DOWNLOADED); - // Cancel any pending data requests since the download was successful - mRequestDataFuture.cancel(true); - mRequestDataCondition.signalAll(); - return termIndex; - } - }).exceptionally(e -> { - try (LockResource ignored = new LockResource(mRequestDataLock)) { - LOG.error("Unexpected exception downloading snapshot from follower {}.", followerIp, e); - // this allows the leading master to request other followers for their snapshots. It - // previously collected information about other snapshots in requestInfo(). If no other - // snapshots are available requestData() will return false and mDownloadState will be - // IDLE - transitionState(DownloadState.STREAM_DATA, DownloadState.REQUEST_DATA); - // Notify the request data tasks to start a request with a new candidate - mRequestDataCondition.signalAll(); - return null; - } - }); - return observer; - } - - /** - * Handles snapshot requests. - * - * @param queryRequest the query request - * @return the response message, or null if the request is not handled - * @throws IOException if any error occurred while handling the request - */ - public Message handleRequest(JournalQueryRequest queryRequest) throws IOException { - if (queryRequest.hasSnapshotInfoRequest()) { - SnapshotMetadata requestSnapshot = queryRequest.getSnapshotInfoRequest().getSnapshotInfo(); - Instant start = Instant.now(); - SnapshotInfo latestSnapshot = mStorage.getLatestSnapshot(); - synchronized (this) { - // We may need to wait for a valid snapshot to be ready - while ((latestSnapshot == null - || (queryRequest.getSnapshotInfoRequest().hasSnapshotInfo() - && (requestSnapshot.getSnapshotTerm() > latestSnapshot.getTerm() - || (requestSnapshot.getSnapshotTerm() == latestSnapshot.getTerm() - && requestSnapshot.getSnapshotIndex() >= latestSnapshot.getIndex())))) - && Duration.between(start, Instant.now()).toMillis() < SNAPSHOT_INFO_TIMEOUT_MS) { - LOG.info("Received snapshot info request from leader - {}, but do not have a " - + "snapshot ready - {}", requestSnapshot, latestSnapshot); - try { - wait(SNAPSHOT_DATA_TIMEOUT_MS - Long.min(SNAPSHOT_DATA_TIMEOUT_MS, - Math.abs(Duration.between(start, Instant.now()).toMillis()))); - } catch (InterruptedException e) { - LOG.debug("Interrupted while waiting for snapshot", e); - break; - } - latestSnapshot = mStorage.getLatestSnapshot(); - } - } - if (latestSnapshot == null) { - LOG.debug("No snapshot to send"); - return toMessage(GetSnapshotInfoResponse.getDefaultInstance()); - } - JournalQueryResponse response = JournalQueryResponse.newBuilder() - .setSnapshotInfoResponse(GetSnapshotInfoResponse.newBuilder().setLatest( - toSnapshotMetadata(latestSnapshot.getTermIndex()))) - .build(); - LOG.info("Sent snapshot info response to leader {}", response); - return toMessage(response); - } - if (queryRequest.hasSnapshotRequest()) { - LOG.info("Start sending snapshot to leader"); - sendSnapshotToLeader(); - return Message.EMPTY; - } - return null; - } - - /** - * Sends a snapshot to a follower. - * - * @param responseObserver the response stream observer - * @return the request stream observer - */ - public StreamObserver sendSnapshotToFollower( - StreamObserver responseObserver) { - SnapshotInfo snapshot = mStorage.getLatestSnapshot(); - LOG.debug("Received snapshot download request from {}", ClientIpAddressInjector.getIpAddress()); - SnapshotUploader requestStreamObserver = - SnapshotUploader.forLeader(mStorage, snapshot, responseObserver); - if (snapshot == null) { - responseObserver.onError(Status.NOT_FOUND - .withDescription("Cannot find a valid snapshot to download.") - .asException()); - return requestStreamObserver; - } - responseObserver.onNext(DownloadSnapshotPResponse.newBuilder() - .setData(SnapshotData.newBuilder() - .setSnapshotTerm(snapshot.getTerm()) - .setSnapshotIndex(snapshot.getIndex()) - .setOffset(0)) - .build()); - return requestStreamObserver; - } - - private static Message toMessage(MessageLite value) { - return Message.valueOf( - UnsafeByteOperations.unsafeWrap(value.toByteString().asReadOnlyByteBuffer())); - } - - private SnapshotMetadata toSnapshotMetadata(TermIndex value) { - return value == null ? null : - SnapshotMetadata.newBuilder() - .setSnapshotTerm(value.getTerm()) - .setSnapshotIndex(value.getIndex()) - .build(); - } - - private boolean transitionState(DownloadState expected, DownloadState update) { - if (!mDownloadState.compareAndSet(expected, update)) { - LOG.warn("Failed to transition from {} to {}: current state is {}", - expected, update, mDownloadState.get()); - return false; - } - LOG.debug("Successfully transitioned from {} to {}", expected, update); - return true; - } - - /** - * Installs a downloaded snapshot in the journal snapshot directory. - * - * @return the index of the installed snapshot - */ - private long installDownloadedSnapshot() { - LOG.info("Call install downloaded snapshot"); - if (!transitionState(DownloadState.DOWNLOADED, DownloadState.INSTALLING)) { - return RaftLog.INVALID_LOG_INDEX; - } - File tempFile = null; - try (Timer.Context ctx = MetricsSystem - .timer(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_INSTALL_TIMER.getName()).time()) { - SnapshotInfo snapshot = mDownloadedSnapshot; - if (snapshot == null) { - throw new IllegalStateException("Snapshot is not completed"); - } - FileInfo fileInfo = snapshot.getFiles().get(0); - tempFile = fileInfo.getPath().toFile(); - if (!tempFile.exists()) { - throw new FileNotFoundException(String.format("Snapshot file %s is not found", tempFile)); - } - SnapshotInfo latestSnapshot = mStorage.getLatestSnapshot(); - TermIndex lastInstalled = latestSnapshot == null ? null : latestSnapshot.getTermIndex(); - TermIndex downloaded = snapshot.getTermIndex(); - if (lastInstalled != null && downloaded.compareTo(lastInstalled) < 0) { - throw new AbortedException( - String.format("Snapshot to be installed %s is older than current snapshot %s", - downloaded, lastInstalled)); - } - final File snapshotFile = mStorage.getSnapshotFile( - downloaded.getTerm(), downloaded.getIndex()); - LOG.debug("Moving temp snapshot {} to file {}", tempFile, snapshotFile); - MD5FileUtil.saveMD5File(snapshotFile, fileInfo.getFileDigest()); - if (!tempFile.renameTo(snapshotFile)) { - throw new IOException(String.format("Failed to rename %s to %s", tempFile, snapshotFile)); - } - synchronized (this) { - mStorage.loadLatestSnapshot(); - notifyAll(); - } - LOG.info("Completed storing snapshot at {} to file {} with size {}", downloaded, - snapshotFile, FormatUtils.getSizeFromBytes(snapshotFile.length())); - return downloaded.getIndex(); - } catch (Exception e) { - LOG.error("Failed to install snapshot", e); - if (tempFile != null) { - tempFile.delete(); - } - return RaftLog.INVALID_LOG_INDEX; - } finally { - transitionState(DownloadState.INSTALLING, DownloadState.IDLE); - } - } - - /** - * Finds a follower with the latest snapshot and sends a request to download it. - */ - private void requestSnapshotFromFollowers() { - if (mDownloadState.get() == DownloadState.IDLE) { - if (!transitionState(DownloadState.IDLE, DownloadState.REQUEST_INFO)) { - return; - } - // we want fresh info not polluted by older requests. This ensures that requestData() requests - // from at most # followers before requesting new info. Otherwise, the candidate queue might - // grow indefinitely. - mSnapshotCandidates.clear(); - requestInfo(); - transitionState(DownloadState.REQUEST_INFO, DownloadState.REQUEST_DATA); - mRequestDataFuture = mRequestDataExecutor.submit(this::requestData, null); - } - } - - private void requestInfo() { - Preconditions.checkState(mDownloadState.get() == DownloadState.REQUEST_INFO); - try { - LOG.info("Call request snapshot info from followers"); - SingleFileSnapshotInfo latestSnapshot = mStorage.getLatestSnapshot(); - SnapshotMetadata snapshotMetadata = latestSnapshot == null ? null : - SnapshotMetadata.newBuilder() - .setSnapshotTerm(latestSnapshot.getTerm()) - .setSnapshotIndex(latestSnapshot.getIndex()) - .build(); - // build SnapshotInfoRequests - GetSnapshotInfoRequest infoRequest; - if (snapshotMetadata == null) { - infoRequest = GetSnapshotInfoRequest.getDefaultInstance(); - } else { - infoRequest = GetSnapshotInfoRequest.newBuilder() - .setSnapshotInfo(snapshotMetadata).build(); - } - Map> jobs = mJournalSystem - .getQuorumServerInfoList() - .stream() - .filter(server -> server.getServerState() == QuorumServerState.AVAILABLE) - .map(server -> RaftJournalUtils.getPeerId( - server.getServerAddress().getHost(), - server.getServerAddress().getRpcPort())) - .filter(peerId -> !peerId.equals(mJournalSystem.getLocalPeerId())) - .collect(Collectors.toMap(Function.identity(), - peerId -> mJournalSystem.sendMessageAsync(peerId, toMessage(JournalQueryRequest - .newBuilder() - .setSnapshotInfoRequest(infoRequest) - .build()), SNAPSHOT_INFO_TIMEOUT_MS))); - // query all secondary masters for information about their latest snapshot - for (Map.Entry> job : jobs.entrySet()) { - RaftPeerId peerId = job.getKey(); - try { - RaftClientReply reply = job.getValue().get(); - if (reply.getException() != null) { - throw reply.getException(); - } - JournalQueryResponse response = JournalQueryResponse.parseFrom( - reply.getMessage().getContent().asReadOnlyByteBuffer()); - if (!response.hasSnapshotInfoResponse()) { - throw new IOException("Invalid response for GetSnapshotInfoRequest " + response); - } - SnapshotMetadata latest = response.getSnapshotInfoResponse().getLatest(); - LOG.info("Received snapshot info from follower {} - {}, my current snapshot is {}", - peerId, latest, snapshotMetadata); - if (snapshotMetadata == null - || (latest.getSnapshotTerm() >= snapshotMetadata.getSnapshotTerm()) - && latest.getSnapshotIndex() > snapshotMetadata.getSnapshotIndex()) { - mSnapshotCandidates.add(new Pair<>(latest, peerId)); - } - } catch (Exception e) { - LOG.warn("Error while requesting snapshot info from {}: {}", peerId, e.toString()); - } - } - } catch (Exception e) { - LogUtils.warnWithException(LOG, "Failed to request snapshot info from followers", e); - } - } - - private void requestData() { - Preconditions.checkState(mDownloadState.get() == DownloadState.REQUEST_DATA); - // request snapshots from the most recent to the least recent - try { - while (!mSnapshotCandidates.isEmpty() && mDownloadState.get() == DownloadState.REQUEST_DATA) { - Pair candidate = mSnapshotCandidates.poll(); - SnapshotMetadata metadata = Objects.requireNonNull(candidate).getFirst(); - RaftPeerId peerId = candidate.getSecond(); - LOG.info("Request data from follower {} for snapshot (t: {}, i: {})", - peerId, metadata.getSnapshotTerm(), metadata.getSnapshotIndex()); - try { - RaftClientReply reply = mJournalSystem.sendMessageAsync(peerId, - toMessage(JournalQueryRequest.newBuilder() - .setSnapshotRequest(GetSnapshotRequest.getDefaultInstance()).build())) - .get(); - if (reply.getException() != null) { - throw reply.getException(); - } - // Wait a timeout before trying the next follower, or until we are awoken - try (LockResource ignored = new LockResource(mRequestDataLock)) { - do { - mRequestDataCondition.await(SNAPSHOT_DATA_TIMEOUT_MS, TimeUnit.MILLISECONDS); - } while (mDownloadState.get() != DownloadState.REQUEST_DATA); - } - } catch (InterruptedException | CancellationException ignored) { - // We are usually interrupted when a snapshot transfer is complete, - // so we can just return without trying a new candidate. - // It is fine even if we are interrupted in other cases as - // a new request info will be initiated by the next takeSnapshot() call. - return; - } catch (Exception e) { - LOG.warn("Failed to request snapshot data from {}: {}", peerId, e); - } - } - } finally { - // Ensure that we return to the IDLE state in case the REQUEST_DATA operations - // were not successful, for example if we were interrupted for some reason - // other than a successful download. - if (mDownloadState.get() == DownloadState.REQUEST_DATA) { - transitionState(DownloadState.REQUEST_DATA, DownloadState.IDLE); - } - } - } - - @VisibleForTesting - synchronized RaftJournalServiceClient createJournalServiceClient() - throws AlluxioStatusException { - RaftJournalServiceClient client = new RaftJournalServiceClient(MasterClientContext - .newBuilder(ClientContext.create(Configuration.global())).build()); - client.connect(); - return client; - } -} diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotUploader.java b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotUploader.java deleted file mode 100644 index 727f6a288b7c..000000000000 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotUploader.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.journal.raft; - -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.exception.status.InvalidArgumentException; -import alluxio.grpc.DownloadSnapshotPRequest; -import alluxio.grpc.DownloadSnapshotPResponse; -import alluxio.grpc.SnapshotData; -import alluxio.grpc.UploadSnapshotPRequest; -import alluxio.grpc.UploadSnapshotPResponse; - -import com.google.protobuf.UnsafeByteOperations; -import io.grpc.stub.ClientCallStreamObserver; -import io.grpc.stub.ClientResponseObserver; -import io.grpc.stub.StreamObserver; -import org.apache.commons.io.IOUtils; -import org.apache.ratis.statemachine.SnapshotInfo; -import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.util.concurrent.CompletableFuture; -import java.util.function.Function; - -/** - * A stream observer for uploading a snapshot. - * - * @param the message type to send - * @param the message type to receive - */ -public class SnapshotUploader - implements StreamObserver, ClientResponseObserver { - private static final Logger LOG = LoggerFactory.getLogger(SnapshotUploader.class); - private static final int SNAPSHOT_CHUNK_SIZE = (int) Configuration.getBytes( - PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_CHUNK_SIZE); - - private final Function mDataMessageBuilder; - private final Function mOffsetGetter; - private final File mSnapshotFile; - private final long mLength; - private final SnapshotInfo mSnapshotInfo; - private long mOffset = 0; - private StreamObserver mStream; - private final CompletableFuture mCompletionFuture = new CompletableFuture<>(); - - /** - * Builds a stream for leader to upload a snapshot. - * - * @param storage the snapshot storage - * @param snapshot the snapshot to upload - * @param stream the download stream - * @return the upload stream for leader - */ - public static SnapshotUploader forLeader( - SimpleStateMachineStorage storage, SnapshotInfo snapshot, - StreamObserver stream) { - return new SnapshotUploader<>(storage, snapshot, stream, - data -> DownloadSnapshotPResponse.getDefaultInstance().toBuilder().setData(data).build(), - DownloadSnapshotPRequest::getOffsetReceived); - } - - /** - * Builds a stream for follower to upload a snapshot. - * - * @param storage the snapshot storage - * @param snapshot the snapshot to upload - * @return the upload stream for follower - */ - public static SnapshotUploader forFollower( - SimpleStateMachineStorage storage, SnapshotInfo snapshot) { - return new SnapshotUploader<>(storage, snapshot, null, - data -> UploadSnapshotPRequest.getDefaultInstance().toBuilder().setData(data).build(), - UploadSnapshotPResponse::getOffsetReceived); - } - - private SnapshotUploader(SimpleStateMachineStorage storage, SnapshotInfo snapshot, - StreamObserver stream, - Function buildFunc, Function offsetGetter) { - mSnapshotInfo = snapshot; - mDataMessageBuilder = buildFunc; - mOffsetGetter = offsetGetter; - mSnapshotFile = storage.getSnapshotFile(snapshot.getTerm(), snapshot.getIndex()); - mLength = mSnapshotFile.length(); - mStream = stream; - } - - @Override - public void onNext(R value) { - try { - onNextInternal(value); - } catch (Exception e) { - LOG.error("Error occurred while sending snapshot", e); - mStream.onError(e); - } - } - - private void onNextInternal(R value) throws IOException { - LOG.debug("Received request {}", value); - if (mStream == null) { - throw new IllegalStateException("No request stream assigned"); - } - if (!mSnapshotFile.exists()) { - throw new FileNotFoundException( - String.format("Snapshot file %s does not exist", mSnapshotFile.getPath())); - } - long offsetReceived = mOffsetGetter.apply(value); - // TODO(feng): implement better flow control - if (mOffset != offsetReceived) { - throw new InvalidArgumentException( - String.format("Received mismatched offset: %d. Expect %d", offsetReceived, mOffset)); - } - LOG.debug("Streaming data at {}", mOffset); - try (InputStream is = new FileInputStream(mSnapshotFile)) { - is.skip(mOffset); - boolean eof = false; - int chunkSize = SNAPSHOT_CHUNK_SIZE; - long available = mLength - mOffset; - if (available <= SNAPSHOT_CHUNK_SIZE) { - eof = true; - chunkSize = (int) available; - } - byte[] buffer = new byte[chunkSize]; - IOUtils.readFully(is, buffer); - LOG.debug("Read {} bytes from file {}", chunkSize, mSnapshotFile); - mStream.onNext(mDataMessageBuilder.apply(SnapshotData.newBuilder() - .setOffset(mOffset) - .setEof(eof) - .setChunk(UnsafeByteOperations.unsafeWrap(buffer)) - .setSnapshotTerm(mSnapshotInfo.getTerm()) - .setSnapshotIndex(mSnapshotInfo.getIndex()) - .build())); - mOffset += chunkSize; - LOG.debug("Uploaded total {} bytes of file {}", mOffset, mSnapshotFile); - } - } - - @Override - public void onError(Throwable t) { - LOG.error("Error sending snapshot {} at {}", mSnapshotFile, mOffset, t); - mStream.onError(t); - mCompletionFuture.completeExceptionally(t); - } - - @Override - public void onCompleted() { - LOG.debug("Received onComplete for {}", mSnapshotInfo); - mStream.onCompleted(); - mCompletionFuture.complete(mSnapshotInfo); - } - - /** - * @return a future used to propagate completion status to {@link SnapshotReplicationManager} - */ - public CompletableFuture getCompletionFuture() { - return mCompletionFuture; - } - - @Override - public void beforeStart(ClientCallStreamObserver requestStream) { - mStream = requestStream; - } -} diff --git a/core/server/common/src/main/java/alluxio/master/journal/ufs/UfsJournal.java b/core/server/common/src/main/java/alluxio/master/journal/ufs/UfsJournal.java index 9daf79418e74..4a54245e55a3 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/ufs/UfsJournal.java +++ b/core/server/common/src/main/java/alluxio/master/journal/ufs/UfsJournal.java @@ -639,6 +639,10 @@ public String toString() { @Override public synchronized void close() { + if (mState.get() == State.PRIMARY && mWriter != null) { + LOG.info("Closing journal {}, state {} last journal location {}, next sequence number {}", + this, mState, mWriter.currentLogName(), mWriter.getNextSequenceNumber()); + } if (mAsyncWriter != null) { mAsyncWriter.close(); mAsyncWriter = null; @@ -652,6 +656,10 @@ public synchronized void close() { // If the tailing thread has crashed before the close, // an exception will be thrown, containing what has originally caused the crash mTailerThread.awaitTermination(false); + if (mState.get() == State.STANDBY) { + LOG.info("Closing journal {}, state {}, next sequence number {}", + this, this.mState, mTailerThread.getNextSequenceNumber()); + } } catch (Throwable t) { // We want to let the thread finish normally, however this call might throw if it already // finished exceptionally. We do not rethrow as we want the shutdown sequence to be smooth diff --git a/core/server/common/src/main/java/alluxio/master/journal/ufs/UfsJournalLogWriter.java b/core/server/common/src/main/java/alluxio/master/journal/ufs/UfsJournalLogWriter.java index fd132a2b488a..20a3aedfc6b6 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/ufs/UfsJournalLogWriter.java +++ b/core/server/common/src/main/java/alluxio/master/journal/ufs/UfsJournalLogWriter.java @@ -522,7 +522,7 @@ private void checkIsWritable() throws JournalClosedException { } } - private String currentLogName() { + String currentLogName() { if (mJournalOutputStream != null) { return mJournalOutputStream.currentLog().toString(); } diff --git a/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServer.java b/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServer.java index 3dfc965a26f2..35d0526ce6a1 100644 --- a/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServer.java +++ b/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServer.java @@ -16,7 +16,7 @@ import alluxio.grpc.GrpcServerAddress; import alluxio.grpc.GrpcServerBuilder; import alluxio.grpc.GrpcService; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.security.user.UserState; import io.grpc.ServerInterceptors; @@ -105,7 +105,7 @@ public synchronized CompletableFuture listen(InetSocketAddress address, .addService(new GrpcService(ServerInterceptors.intercept( new GrpcMessagingServiceClientHandler(address, listener::accept, threadContext, mExecutor, mConf.getMs(PropertyKey.MASTER_EMBEDDED_JOURNAL_MAX_ELECTION_TIMEOUT)), - new ClientIpAddressInjector()))) + new ClientContextServerInjector()))) .build(); try { diff --git a/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServiceClientHandler.java b/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServiceClientHandler.java index 108f984cb609..62df849e40a9 100644 --- a/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServiceClientHandler.java +++ b/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServiceClientHandler.java @@ -13,7 +13,7 @@ import alluxio.grpc.MessagingServiceGrpc; import alluxio.grpc.TransportMessage; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import com.google.common.base.MoreObjects; import io.grpc.stub.StreamObserver; @@ -74,7 +74,7 @@ public StreamObserver connect( // Transport level identifier for this connection. String transportId = MoreObjects.toStringHelper(this) .add("ServerAddress", mServerAddress) - .add("ClientAddress", ClientIpAddressInjector.getIpAddress()) + .add("ClientAddress", ClientContextServerInjector.getIpAddress()) .toString(); LOG.debug("Creating a messaging server connection: {}", transportId); diff --git a/core/server/common/src/main/java/alluxio/master/transport/Listeners.java b/core/server/common/src/main/java/alluxio/master/transport/Listeners.java index b01f6ad48536..225a0297f041 100644 --- a/core/server/common/src/main/java/alluxio/master/transport/Listeners.java +++ b/core/server/common/src/main/java/alluxio/master/transport/Listeners.java @@ -70,7 +70,7 @@ public CompletableFuture accept(T event) { listener.getListener().accept(event); } } - return CompletableFuture.allOf(futures.toArray(new CompletableFuture[futures.size()])); + return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])); } @Override diff --git a/core/server/common/src/main/java/alluxio/metrics/sink/MetricsServlet.java b/core/server/common/src/main/java/alluxio/metrics/sink/MetricsServlet.java index 6d67de1f4727..84298b54dce7 100644 --- a/core/server/common/src/main/java/alluxio/metrics/sink/MetricsServlet.java +++ b/core/server/common/src/main/java/alluxio/metrics/sink/MetricsServlet.java @@ -32,9 +32,10 @@ @NotThreadSafe public class MetricsServlet implements Sink { public static final String SERVLET_PATH = "/metrics/json"; + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .registerModule(new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, false)); private MetricRegistry mMetricsRegistry; - private ObjectMapper mObjectMapper; /** * Creates a new {@link MetricsServlet} with a {@link Properties} and {@link MetricRegistry}. @@ -43,9 +44,6 @@ public class MetricsServlet implements Sink { */ public MetricsServlet(MetricRegistry registry) { mMetricsRegistry = registry; - mObjectMapper = - new ObjectMapper().registerModule(new MetricsModule(TimeUnit.SECONDS, - TimeUnit.MILLISECONDS, false)); } private HttpServlet createServlet() { @@ -58,7 +56,7 @@ protected void doGet(HttpServletRequest request, HttpServletResponse response) response.setContentType("application/json"); response.setStatus(HttpServletResponse.SC_OK); response.setHeader("Cache-Control", "no-cache, no-store, must-revalidate"); - String result = mObjectMapper.writerWithDefaultPrettyPrinter() + String result = OBJECT_MAPPER.writerWithDefaultPrettyPrinter() .writeValueAsString(mMetricsRegistry); response.getWriter().println(result); } diff --git a/core/server/common/src/main/java/alluxio/underfs/AbstractUfsManager.java b/core/server/common/src/main/java/alluxio/underfs/AbstractUfsManager.java index 4de988ca2e1b..3478edfe5dab 100644 --- a/core/server/common/src/main/java/alluxio/underfs/AbstractUfsManager.java +++ b/core/server/common/src/main/java/alluxio/underfs/AbstractUfsManager.java @@ -160,19 +160,20 @@ private UnderFileSystem getOrAddWithRecorder(AlluxioURI ufsUri, if (useManagedBlocking) { fs = new ManagedBlockingUfsForwarder(fs); } - - if (mUnderFileSystemMap.putIfAbsent(key, fs) != null) { - // This shouldn't occur unless our synchronization is incorrect - LOG.warn("UFS already existed in UFS manager"); - } mCloser.register(fs); try { connectUfs(fs); - } catch (IOException e) { + tryUseFileSystem(fs, ufsUri.getPath()); + } catch (Exception e) { String message = String.format( "Failed to perform initial connect to UFS %s: %s", ufsUri, e); recorder.record(message); LOG.warn(message); + throw new RuntimeException(e); + } + if (mUnderFileSystemMap.putIfAbsent(key, fs) != null) { + // This shouldn't occur unless our synchronization is incorrect + LOG.warn("UFS already existed in UFS manager"); } return fs; } @@ -185,6 +186,17 @@ private UnderFileSystem getOrAddWithRecorder(AlluxioURI ufsUri, */ protected abstract void connectUfs(UnderFileSystem fs) throws IOException; + /** + * To check whether the filesystem is available by calling exists. + * + * @param fs the filesystem + * @param ufsPath the UFS path + * @throws Exception + */ + private void tryUseFileSystem(UnderFileSystem fs, String ufsPath) throws Exception { + fs.exists(ufsPath); + } + @Override public void addMount(long mountId, final AlluxioURI ufsUri, final UnderFileSystemConfiguration ufsConf) { diff --git a/core/server/common/src/main/java/alluxio/web/CORSFilter.java b/core/server/common/src/main/java/alluxio/web/CORSFilter.java new file mode 100644 index 000000000000..b3dbcd171ca8 --- /dev/null +++ b/core/server/common/src/main/java/alluxio/web/CORSFilter.java @@ -0,0 +1,56 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.web; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; + +import org.apache.commons.lang3.StringUtils; + +import java.io.IOException; +import javax.servlet.FilterChain; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +/** + * A filter for adding the Cors header to the http header. + */ +public class CORSFilter extends HttpFilter { + @Override + public void doFilter(HttpServletRequest req, HttpServletResponse resp, FilterChain chain) + throws IOException, ServletException { + if (Configuration.getBoolean(PropertyKey.WEB_CORS_ENABLED)) { + String allowOrigins = Configuration.getString(PropertyKey.WEB_CORS_ALLOW_ORIGINS); + String allowMethods = Configuration.getString(PropertyKey.WEB_CORS_ALLOW_METHODS); + String allowHeaders = Configuration.getString(PropertyKey.WEB_CORS_ALLOW_HEADERS); + String exposeHeaders = Configuration.getString(PropertyKey.WEB_CORS_EXPOSED_HEADERS); + boolean allowCredential = Configuration.getBoolean( + PropertyKey.WEB_CORS_ALLOW_CREDENTIAL); + int maxAge = Configuration.getInt(PropertyKey.WEB_CORS_MAX_AGE); + + if (!StringUtils.equals(allowOrigins, "*")) { + resp.addHeader("Vary", "Origin"); + } + + resp.setHeader("Access-Control-Allow-Origin", allowOrigins); + resp.setHeader("Access-Control-Allow-Headers", allowHeaders); + resp.setHeader("Access-Control-Allow-Methods", allowMethods); + resp.setHeader("Access-Control-Max-Age", String.valueOf(maxAge)); + resp.setHeader("Access-Control-Expose-Headers", exposeHeaders); + if (allowCredential) { + resp.setHeader("Access-Control-Allow-Credentials", "true"); + } + } + chain.doFilter(req, resp); + } +} diff --git a/core/server/common/src/main/java/alluxio/web/HttpFilter.java b/core/server/common/src/main/java/alluxio/web/HttpFilter.java new file mode 100644 index 000000000000..b09ebaa89daf --- /dev/null +++ b/core/server/common/src/main/java/alluxio/web/HttpFilter.java @@ -0,0 +1,59 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.web; + +import java.io.IOException; +import javax.servlet.Filter; +import javax.servlet.FilterChain; +import javax.servlet.FilterConfig; +import javax.servlet.ServletException; +import javax.servlet.ServletRequest; +import javax.servlet.ServletResponse; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +/** + * Provides an abstract class to be subclassed to create an HTTP filter. + */ +public abstract class HttpFilter implements Filter { + @Override + public void init(FilterConfig filterConfig) throws ServletException { + } + + @Override + public void destroy() { + } + + @Override + public final void doFilter(ServletRequest req, ServletResponse resp, FilterChain chain) + throws IOException, ServletException { + if (!(req instanceof HttpServletRequest && resp instanceof HttpServletResponse)) { + throw new ServletException( + String.format("Received non-HTTP request or response: req=%s, resp=%s", + req.getClass(), resp.getClass())); + } + + HttpServletRequest request = (HttpServletRequest) req; + HttpServletResponse response = (HttpServletResponse) resp; + + doFilter(request, response, chain); + } + + /** + * Receives standard HTTP requests from the public doFilter method. + * @param req http request + * @param resp http response + * @param chain filter chain + */ + public abstract void doFilter(HttpServletRequest req, HttpServletResponse resp, FilterChain chain) + throws IOException, ServletException; +} diff --git a/core/server/common/src/main/java/alluxio/web/WebServer.java b/core/server/common/src/main/java/alluxio/web/WebServer.java index 4a5527dc41bb..c7dd6f221ecf 100644 --- a/core/server/common/src/main/java/alluxio/web/WebServer.java +++ b/core/server/common/src/main/java/alluxio/web/WebServer.java @@ -38,7 +38,9 @@ import java.io.IOException; import java.net.InetSocketAddress; +import java.util.EnumSet; import javax.annotation.concurrent.NotThreadSafe; +import javax.servlet.DispatcherType; /** * Class that bootstraps and starts a web server. @@ -60,6 +62,10 @@ public abstract class WebServer { private final PrometheusMetricsServlet mPMetricsServlet = new PrometheusMetricsServlet( MetricsSystem.METRIC_REGISTRY); + protected ServerConnector getServerConnector() { + return mServerConnector; + } + /** * Creates a new instance of {@link WebServer}. It pairs URLs with servlets and sets the webapp * folder. @@ -112,6 +118,9 @@ public WebServer(String serviceName, InetSocketAddress address) { } mServletContextHandler.addServlet(StacksServlet.class, THREAD_DUMP_PATH); mServletContextHandler.addServlet(JmxServlet.class, JMX_PATH); + mServletContextHandler.addFilter(CORSFilter.class, "/*", + EnumSet.of(DispatcherType.REQUEST, DispatcherType.FORWARD, DispatcherType.INCLUDE, + DispatcherType.ASYNC, DispatcherType.ERROR)); HandlerList handlers = new HandlerList(); handlers.setHandlers(new Handler[] {mMetricsServlet.getHandler(), mPMetricsServlet.getHandler(), mServletContextHandler, new DefaultHandler()}); diff --git a/core/server/common/src/test/java/alluxio/master/StateLockManagerTest.java b/core/server/common/src/test/java/alluxio/master/StateLockManagerTest.java index 50fa62984b41..0494fa50f0a1 100644 --- a/core/server/common/src/test/java/alluxio/master/StateLockManagerTest.java +++ b/core/server/common/src/test/java/alluxio/master/StateLockManagerTest.java @@ -18,7 +18,6 @@ import alluxio.conf.PropertyKey; import alluxio.resource.LockResource; import alluxio.util.CommonUtils; -import alluxio.util.ThreadUtils; import com.google.common.util.concurrent.SettableFuture; import org.junit.Assert; @@ -26,7 +25,7 @@ import org.junit.Test; import org.junit.rules.ExpectedException; -import java.util.List; +import java.util.Collection; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeoutException; import java.util.concurrent.locks.Lock; @@ -140,6 +139,7 @@ public void testExclusiveOnlyMode() throws Throwable { } @Test + // TODO(jiacheng): run this test before committing public void testGetStateLockSharedWaitersAndHolders() throws Throwable { final StateLockManager stateLockManager = new StateLockManager(); @@ -149,10 +149,11 @@ public void testGetStateLockSharedWaitersAndHolders() throws Throwable { StateLockingThread sharedHolderThread = new StateLockingThread(stateLockManager, false); sharedHolderThread.start(); sharedHolderThread.waitUntilStateLockAcquired(); - final List sharedWaitersAndHolders = stateLockManager.getSharedWaitersAndHolders(); + final Collection sharedWaitersAndHolders = + stateLockManager.getSharedWaitersAndHolders(); assertEquals(i, sharedWaitersAndHolders.size()); assertTrue(sharedWaitersAndHolders.contains( - ThreadUtils.getThreadIdentifier(sharedHolderThread))); + sharedHolderThread.getName())); } } diff --git a/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java b/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java index 83e9cfaf6e1c..3148b3239685 100644 --- a/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java +++ b/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java @@ -98,7 +98,13 @@ public class JournalEntryAssociationTest { JournalEntry.newBuilder().setUpdateInodeFile(UpdateInodeFileEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setAddTransformJobInfo(Table.AddTransformJobInfoEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setRemoveTransformJobInfo(Table.RemoveTransformJobInfoEntry.getDefaultInstance()).build(), - JournalEntry.newBuilder().setCompleteTransformTable(Table.CompleteTransformTableEntry.getDefaultInstance()).build() + JournalEntry.newBuilder().setCompleteTransformTable(Table.CompleteTransformTableEntry.getDefaultInstance()).build(), + JournalEntry.newBuilder().setLoadJob(alluxio.proto.journal.Job.LoadJobEntry.newBuilder() + .setLoadPath("/test").setState(alluxio.proto.journal.Job.PJobState.CREATED) + .setBandwidth(1).setPartialListing(false).setVerify(true).setJobId("1").build()).build(), + JournalEntry.newBuilder().setCopyJob(alluxio.proto.journal.Job.CopyJobEntry.newBuilder() + .setSrc("/src").setDst("/dst").setState(alluxio.proto.journal.Job.PJobState.CREATED) + .setBandwidth(1).setPartialListing(false).setVerify(true).setJobId("2").build()).build() ); // CHECKSTYLE.OFF: LineLengthExceed diff --git a/core/server/common/src/test/java/alluxio/master/journal/JournalUtilsTest.java b/core/server/common/src/test/java/alluxio/master/journal/JournalUtilsTest.java index bc211e90bf69..50a42359e9d3 100644 --- a/core/server/common/src/test/java/alluxio/master/journal/JournalUtilsTest.java +++ b/core/server/common/src/test/java/alluxio/master/journal/JournalUtilsTest.java @@ -12,22 +12,29 @@ package alluxio.master.journal; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import alluxio.master.journal.checkpoint.CheckpointInputStream; import alluxio.master.journal.checkpoint.CheckpointName; import alluxio.master.journal.checkpoint.CheckpointOutputStream; import alluxio.master.journal.checkpoint.CheckpointType; +import alluxio.master.journal.checkpoint.Checkpointed; import alluxio.proto.journal.File.AddMountPointEntry; import alluxio.proto.journal.Journal.JournalEntry; import alluxio.resource.CloseableIterator; +import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -39,6 +46,9 @@ public final class JournalUtilsTest { @Rule public ExpectedException mThrown = ExpectedException.none(); + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + @Test public void checkpointAndRestore() throws IOException, InterruptedException { Journaled journaled = new TestJournaled(0); @@ -75,6 +85,92 @@ public void checkpointAndRestoreComponents() throws Exception { components.forEach(c -> assertEquals(1, c.getNumEntriesProcessed())); } + @Test + public void noEntryTest() throws IOException, InterruptedException { + testEntries(0); + } + + @Test + public void oneEntryTest() throws IOException, InterruptedException { + testEntries(1); + } + + @Test + public void multiEntryTest() throws IOException, InterruptedException { + testEntries(5); + } + + private void testEntries(int numEntries) throws IOException, InterruptedException { + TestMultiEntryJournaled journaled = createJournaled(numEntries, 0L); + ArrayList copy = new ArrayList<>(journaled.mProcessedEntries); + File file = mFolder.newFile(); + try (OutputStream outputStream = Files.newOutputStream(file.toPath())) { + JournalUtils.writeJournalEntryCheckpoint(outputStream, journaled); + } + journaled.resetState(); + try (CheckpointInputStream inputStream = + new CheckpointInputStream(Files.newInputStream(file.toPath()))) { + JournalUtils.restoreJournalEntryCheckpoint(inputStream, journaled); + } + Assert.assertEquals(copy, journaled.mProcessedEntries); + } + + @Test + public void testCompoundNone() throws IOException, InterruptedException { + testCompound(0); + } + + @Test + public void testCompoundOne() throws IOException, InterruptedException { + testCompound(1); + } + + @Test + public void testCompoundMulti() throws IOException, InterruptedException { + testCompound(5); + } + + private void testCompound(int numElements) throws IOException, InterruptedException { + List checkpointed = new ArrayList<>(numElements); + int numEntries = 5; + long sequenceNumber = 0; + for (int i = 0; i < numElements; i++) { + if (i % 2 == 0) { + checkpointed.add(createJournaled(numEntries, sequenceNumber)); + } else { + checkpointed.add(new TestCheckpointed(numEntries, sequenceNumber)); + } + sequenceNumber += numEntries; + } + + ArrayList copy = new ArrayList<>(checkpointed); + File file = mFolder.newFile(); + try (OutputStream outputStream = Files.newOutputStream(file.toPath())) { + JournalUtils.writeToCheckpoint(outputStream, checkpointed); + } + for (Checkpointed c : checkpointed) { + if (c instanceof Journaled) { + ((Journaled) c).resetState(); + } else if (c instanceof TestCheckpointed) { + ((TestCheckpointed) c).clear(); + } + } + try (CheckpointInputStream inputStream = + new CheckpointInputStream(Files.newInputStream(file.toPath()))) { + JournalUtils.restoreFromCheckpoint(inputStream, checkpointed); + } + assertEquals(copy, checkpointed); + } + + private TestMultiEntryJournaled createJournaled(int numEntries, long baseSequenceNumber) { + TestMultiEntryJournaled journaled = new TestMultiEntryJournaled(); + for (int i = 0; i < numEntries; i++) { + journaled.processJournalEntry( + JournalEntry.newBuilder().setSequenceNumber(baseSequenceNumber + i).build()); + } + return journaled; + } + private static class TestJournaled implements Journaled { private final CheckpointName mName; private int mNumEntriesProcessed; @@ -108,4 +204,101 @@ public CheckpointName getCheckpointName() { return mName; } } + + private static class TestMultiEntryJournaled implements Journaled { + private static int sIndex = 0; + private final CheckpointName mName; + private final List mProcessedEntries = new ArrayList<>(); + + TestMultiEntryJournaled() { + mName = CheckpointName.values()[sIndex]; + sIndex = (sIndex + 1); + assertTrue("Cannot create too many Journaled instances", + sIndex <= CheckpointName.values().length); + } + + @Override + public CloseableIterator getJournalEntryIterator() { + return CloseableIterator.noopCloseable(mProcessedEntries.iterator()); + } + + @Override + public boolean processJournalEntry(JournalEntry entry) { + return mProcessedEntries.add(entry); + } + + @Override + public void resetState() { + mProcessedEntries.clear(); + } + + @Override + public CheckpointName getCheckpointName() { + return mName; + } + + @Override + public int hashCode() { + return super.hashCode(); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof TestMultiEntryJournaled + && mProcessedEntries.equals(((TestMultiEntryJournaled) obj).mProcessedEntries); + } + } + + private static class TestCheckpointed implements Checkpointed { + private static long sLong = 0L; + private final CheckpointName mName; + private final int mSize; + private final List mState = new ArrayList<>(); + + TestCheckpointed(int numLongs, long baseLong) { + mName = new TestMultiEntryJournaled().getCheckpointName(); + mSize = numLongs; + for (int i = 0; i < mSize; i++) { + mState.add(baseLong + i); + } + } + + public void clear() { + mState.clear(); + } + + @Override + public CheckpointName getCheckpointName() { + return mName; + } + + @Override + public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { + CheckpointOutputStream outputStream = new CheckpointOutputStream(output, + CheckpointType.LONGS); + for (Long l : mState) { + outputStream.writeLong(l); + } + } + + @Override + public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { + assertEquals(CheckpointType.LONGS, input.getType()); + for (int i = 0; i < mSize; i++) { + long l = input.readLong(); + mState.add(l); + } + } + + @Override + public int hashCode() { + return super.hashCode(); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof TestCheckpointed + && mState.equals(((TestCheckpointed) obj).mState); + } + } } diff --git a/core/server/common/src/test/java/alluxio/master/journal/checkpoint/CheckpointStreamTest.java b/core/server/common/src/test/java/alluxio/master/journal/checkpoint/CheckpointStreamTest.java new file mode 100644 index 000000000000..4ee717313b6e --- /dev/null +++ b/core/server/common/src/test/java/alluxio/master/journal/checkpoint/CheckpointStreamTest.java @@ -0,0 +1,79 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.checkpoint; + +import net.bytebuddy.utility.RandomString; +import org.apache.ratis.io.MD5Hash; +import org.apache.ratis.util.MD5FileUtil; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.security.MessageDigest; +import java.util.Arrays; +import java.util.Collection; + +@RunWith(Parameterized.class) +public class CheckpointStreamTest { + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(CheckpointType.values()); + } + + @Parameterized.Parameter + public CheckpointType mType; + + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + @Test + public void regularStreamTest() throws IOException { + File file = mFolder.newFile(); + byte[] contents = RandomString.make().getBytes(); + try (CheckpointOutputStream outputStream = + new CheckpointOutputStream(Files.newOutputStream(file.toPath()), mType)) { + outputStream.write(contents); + } + byte[] retrieved = new byte[contents.length]; + try (CheckpointInputStream s = new CheckpointInputStream(Files.newInputStream(file.toPath()))) { + Assert.assertEquals(mType, s.getType()); + s.read(retrieved); + } + Assert.assertArrayEquals(contents, retrieved); + } + + @Test + public void optimizedStreamTest() throws IOException { + File file = mFolder.newFile(); + MessageDigest md5Out = MD5Hash.getDigester(); + byte[] contents = RandomString.make().getBytes(); + try (CheckpointOutputStream outputStream = + new CheckpointOutputStream(new OptimizedCheckpointOutputStream(file, md5Out), mType)) { + outputStream.write(contents); + } + MD5FileUtil.saveMD5File(file, new MD5Hash(md5Out.digest())); + MessageDigest md5In = MD5Hash.getDigester(); + byte[] retrieved = new byte[contents.length]; + try (CheckpointInputStream s = new OptimizedCheckpointInputStream(file, md5In)) { + Assert.assertEquals(mType, s.getType()); + s.read(retrieved); + } + MD5FileUtil.verifySavedMD5(file, new MD5Hash(md5In.digest())); + Assert.assertArrayEquals(contents, retrieved); + } +} diff --git a/core/server/common/src/test/java/alluxio/master/journal/raft/RaftSnapshotManagerTest.java b/core/server/common/src/test/java/alluxio/master/journal/raft/RaftSnapshotManagerTest.java new file mode 100644 index 000000000000..e25ad967cf47 --- /dev/null +++ b/core/server/common/src/test/java/alluxio/master/journal/raft/RaftSnapshotManagerTest.java @@ -0,0 +1,274 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.raft; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.GrpcServer; +import alluxio.grpc.GrpcServerAddress; +import alluxio.grpc.GrpcServerBuilder; +import alluxio.grpc.GrpcService; +import alluxio.grpc.ServiceType; + +import net.bytebuddy.utility.RandomString; +import org.apache.commons.io.FileUtils; +import org.apache.ratis.io.MD5Hash; +import org.apache.ratis.server.RaftServerConfigKeys; +import org.apache.ratis.server.raftlog.RaftLog; +import org.apache.ratis.server.storage.RaftStorage; +import org.apache.ratis.server.storage.RaftStorageImpl; +import org.apache.ratis.server.storage.StorageImplUtils; +import org.apache.ratis.statemachine.StateMachineStorage; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; +import org.apache.ratis.util.MD5FileUtil; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.ServerSocket; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.Executors; +import java.util.stream.Collectors; + +public class RaftSnapshotManagerTest { + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + private final List mGrpcServers = new ArrayList<>(); + private final List mSmStorages = new ArrayList<>(); + private final List mManagers = new ArrayList<>(); + + @Before + public void before() throws IOException { + Configuration.set(PropertyKey.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT, "10ms"); + // create Raft Storages and grpc servers for all masters + // no need to create full master processes + for (int i = 0; i < 3; i++) { + // create the state machine storage and initalize it using the raft storage + SnapshotDirStateMachineStorage smStorage = createStateMachineStorage(mFolder); + mSmStorages.add(smStorage); + RaftJournalServiceHandler handler = new RaftJournalServiceHandler(smStorage); + // create and start a grpc server for each on a random available port + GrpcServer server = createGrpcServer(handler); + server.start(); + mGrpcServers.add(server); + } + // create snapshot managers based on the ports being used by the servers + String hostAddress = InetAddress.getLocalHost().getHostAddress(); + String rpcAddresses = mGrpcServers.stream() + .map(server -> String.format("%s:%d", hostAddress, server.getBindPort())) + .collect(Collectors.joining(",")); + Configuration.set(PropertyKey.MASTER_RPC_ADDRESSES, rpcAddresses); + // create SnapshotDownloaders after the fact: this is because the downloaders cache their + // grpc clients to reuse them efficiently. They create the clients based on the configured + // rpc addresses, excluding their own. + for (int i = 0; i < mGrpcServers.size(); i++) { + Configuration.set(PropertyKey.MASTER_RPC_PORT, mGrpcServers.get(i).getBindPort()); + mManagers.add(new RaftSnapshotManager(mSmStorages.get(i), + Executors.newSingleThreadExecutor())); + } + } + + @After + public void after() throws IOException { + mGrpcServers.forEach(GrpcServer::shutdown); + mGrpcServers.forEach(GrpcServer::awaitTermination); + } + + @Test + public void noneAvailable() { + mManagers.get(0).downloadSnapshotFromOtherMasters(); + long l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(RaftLog.INVALID_LOG_INDEX, l); + } + + @Test + public void simple() throws IOException { + createSampleSnapshot(mSmStorages.get(1), 1, 10); + mSmStorages.get(1).loadLatestSnapshot(); + + mManagers.get(0).downloadSnapshotFromOtherMasters(); + long l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(10, l); + File snapshotDir1 = mSmStorages.get(1).getSnapshotDir(); + File snapshotDir0 = mSmStorages.get(0).getSnapshotDir(); + Assert.assertTrue(directoriesEqual(snapshotDir0, snapshotDir1)); + } + + @Test + public void oneUnavailable() throws IOException { + mGrpcServers.get(2).shutdown(); + mGrpcServers.get(2).awaitTermination(); + + createSampleSnapshot(mSmStorages.get(1), 1, 10); + mSmStorages.get(1).loadLatestSnapshot(); + + mManagers.get(0).downloadSnapshotFromOtherMasters(); + long l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(10, l); + File snapshotDir1 = mSmStorages.get(1).getSnapshotDir(); + File snapshotDir0 = mSmStorages.get(0).getSnapshotDir(); + Assert.assertTrue(directoriesEqual(snapshotDir0, snapshotDir1)); + } + + @Test + public void downloadHigherOne() throws IOException { + createSampleSnapshot(mSmStorages.get(1), 1, 10); + mSmStorages.get(1).loadLatestSnapshot(); + createSampleSnapshot(mSmStorages.get(2), 1, 100); + mSmStorages.get(2).loadLatestSnapshot(); + + mManagers.get(0).downloadSnapshotFromOtherMasters(); + long l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(100, l); + File snapshotDir2 = mSmStorages.get(2).getSnapshotDir(); + File snapshotDir1 = mSmStorages.get(1).getSnapshotDir(); + File snapshotDir0 = mSmStorages.get(0).getSnapshotDir(); + Assert.assertTrue(directoriesEqual(snapshotDir0, snapshotDir2)); + Assert.assertFalse(directoriesEqual(snapshotDir1, snapshotDir0)); + Assert.assertFalse(directoriesEqual(snapshotDir1, snapshotDir2)); + } + + @Test + public void higherOneUnavailable() throws IOException { + createSampleSnapshot(mSmStorages.get(1), 1, 10); + createSampleSnapshot(mSmStorages.get(2), 1, 100); + mSmStorages.get(1).loadLatestSnapshot(); + mSmStorages.get(2).loadLatestSnapshot(); + mGrpcServers.get(2).shutdown(); + mGrpcServers.get(2).awaitTermination(); + + mManagers.get(0).downloadSnapshotFromOtherMasters(); + long l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(10, l); + File snapshotDir2 = mSmStorages.get(2).getSnapshotDir(); + File snapshotDir1 = mSmStorages.get(1).getSnapshotDir(); + File snapshotDir0 = mSmStorages.get(0).getSnapshotDir(); + Assert.assertTrue(directoriesEqual(snapshotDir0, snapshotDir1)); + Assert.assertFalse(directoriesEqual(snapshotDir2, snapshotDir0)); + Assert.assertFalse(directoriesEqual(snapshotDir2, snapshotDir1)); + } + + @Test + public void successThenFailureThenSuccess() throws IOException { + // eliminate one of the two servers + mGrpcServers.get(2).shutdown(); + mGrpcServers.get(2).awaitTermination(); + + createSampleSnapshot(mSmStorages.get(1), 1, 10); + mSmStorages.get(1).loadLatestSnapshot(); + mManagers.get(0).downloadSnapshotFromOtherMasters(); + long l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(10, l); + File snapshotDir1 = mSmStorages.get(1).getSnapshotDir(); + File snapshotDir0 = mSmStorages.get(0).getSnapshotDir(); + Assert.assertTrue(directoriesEqual(snapshotDir0, snapshotDir1)); + + createSampleSnapshot(mSmStorages.get(1), 2, 100); + mSmStorages.get(1).loadLatestSnapshot(); + int bindPort = mGrpcServers.get(1).getBindPort(); + mGrpcServers.get(1).shutdown(); + mGrpcServers.get(1).awaitTermination(); + mManagers.get(0).downloadSnapshotFromOtherMasters(); + l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(-1, l); // failure expected + + // recreate grpc server on the same port + mGrpcServers.add(1, + createGrpcServer(new RaftJournalServiceHandler(mSmStorages.get(1)), bindPort)); + mGrpcServers.get(1).start(); + createSampleSnapshot(mSmStorages.get(1), 3, 1_000); + mSmStorages.get(1).loadLatestSnapshot(); + mManagers.get(0).downloadSnapshotFromOtherMasters(); + l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(1_000, l); + // server 1 has more snapshots than server 0 + Assert.assertFalse(directoriesEqual(snapshotDir0, snapshotDir1)); + } + + public static SnapshotDirStateMachineStorage createStateMachineStorage(TemporaryFolder folder) + throws IOException { + RaftStorageImpl raftStorage = StorageImplUtils.newRaftStorage(folder.newFolder(), + RaftServerConfigKeys.Log.CorruptionPolicy.EXCEPTION, RaftStorage.StartupOption.RECOVER, + RaftServerConfigKeys.STORAGE_FREE_SPACE_MIN_DEFAULT.getSize()); + raftStorage.initialize(); + SnapshotDirStateMachineStorage smStorage = new SnapshotDirStateMachineStorage(); + smStorage.init(raftStorage); + return smStorage; + } + + public static GrpcServer createGrpcServer(RaftJournalServiceHandler handler) throws IOException { + return createGrpcServer(handler, 0); + } + + public static GrpcServer createGrpcServer(RaftJournalServiceHandler handler, int port) + throws IOException { + try (ServerSocket socket = new ServerSocket(port)) { + InetSocketAddress address = new InetSocketAddress(socket.getLocalPort()); + return GrpcServerBuilder.forAddress( + GrpcServerAddress.create(address.getHostName(), address), + Configuration.global()) + .addService(ServiceType.RAFT_JOURNAL_SERVICE, new GrpcService(handler)) + .build(); + } + } + + public static void createSampleSnapshot(StateMachineStorage smStorage, long term, long index) + throws IOException { + String snapshotDirName = SimpleStateMachineStorage.getSnapshotFileName(term, index); + File dir = new File(smStorage.getSnapshotDir(), snapshotDirName); + if (!dir.exists() && !dir.mkdirs()) { + throw new IOException(String.format("Unable to create directory %s", dir)); + } + for (int i = 0; i < 10; i++) { + String s = "dummy-file-" + i; + File file = new File(dir, s); + try (FileOutputStream outputStream = new FileOutputStream(file)) { + outputStream.write(RandomString.make().getBytes()); + } + MD5Hash md5Hash = MD5FileUtil.computeMd5ForFile(file); + MD5FileUtil.saveMD5File(file, md5Hash); + } + } + + public static boolean directoriesEqual(File dir1, File dir2) throws IOException { + if (!dir1.getName().equals(dir2.getName())) { + return false; + } + List files1 = new ArrayList<>(FileUtils.listFiles(dir1, null, true)); + List files2 = new ArrayList<>(FileUtils.listFiles(dir2, null, true)); + if (files1.size() != files2.size()) { + return false; + } + for (File file1 : files1) { + Path relativize1 = dir1.toPath().relativize(file1.toPath()); + Optional optionalFile = files2.stream() + .filter(file -> dir2.toPath().relativize(file.toPath()).equals(relativize1)) + .findFirst(); + if (!optionalFile.isPresent() || !FileUtils.contentEquals(file1, optionalFile.get())) { + return false; + } + } + return true; + } +} diff --git a/core/server/common/src/test/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorageTest.java b/core/server/common/src/test/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorageTest.java new file mode 100644 index 000000000000..c60d06da378b --- /dev/null +++ b/core/server/common/src/test/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorageTest.java @@ -0,0 +1,167 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.raft; + +import static alluxio.master.journal.raft.RaftSnapshotManagerTest.createSampleSnapshot; +import static alluxio.master.journal.raft.RaftSnapshotManagerTest.createStateMachineStorage; + +import net.bytebuddy.utility.RandomString; +import org.apache.ratis.server.protocol.TermIndex; +import org.apache.ratis.statemachine.SnapshotInfo; +import org.apache.ratis.statemachine.SnapshotRetentionPolicy; +import org.apache.ratis.statemachine.impl.FileListSnapshotInfo; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; +import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.stream.Stream; + +public class SnapshotDirStateMachineStorageTest { + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + final SnapshotRetentionPolicy mRetentionPolicy = new SnapshotRetentionPolicy() { + @Override + public int getNumSnapshotsRetained() { + return 1; // keep only 1 snapshot + } + }; + SnapshotDirStateMachineStorage mStateMachineStorage; + + @Before + public void before() throws IOException { + mStateMachineStorage = createStateMachineStorage(mFolder); + } + + @Test + public void noSnapshot() { + Assert.assertNull(mStateMachineStorage.getLatestSnapshot()); + } + + @Test + public void onlyUpdateOnLoad() throws IOException { + Assert.assertNull(mStateMachineStorage.getLatestSnapshot()); + createSampleSnapshot(mStateMachineStorage, 1, 10); + // still null until new information is loaded + Assert.assertNull(mStateMachineStorage.getLatestSnapshot()); + } + + @Test + public void singleSnapshot() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 10); + mStateMachineStorage.loadLatestSnapshot(); + SnapshotInfo latestSnapshot = mStateMachineStorage.getLatestSnapshot(); + Assert.assertTrue(latestSnapshot instanceof FileListSnapshotInfo); + Assert.assertEquals(TermIndex.valueOf(1, 10), latestSnapshot.getTermIndex()); + } + + @Test + public void newerIndex() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 10); + mStateMachineStorage.loadLatestSnapshot(); + Assert.assertEquals(TermIndex.valueOf(1, 10), + mStateMachineStorage.getLatestSnapshot().getTermIndex()); + createSampleSnapshot(mStateMachineStorage, 1, 15); + mStateMachineStorage.loadLatestSnapshot(); + Assert.assertEquals(TermIndex.valueOf(1, 15), + mStateMachineStorage.getLatestSnapshot().getTermIndex()); + } + + @Test + public void newerTerm() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 10); + mStateMachineStorage.loadLatestSnapshot(); + Assert.assertEquals(TermIndex.valueOf(1, 10), + mStateMachineStorage.getLatestSnapshot().getTermIndex()); + createSampleSnapshot(mStateMachineStorage, 2, 5); + mStateMachineStorage.loadLatestSnapshot(); + Assert.assertEquals(TermIndex.valueOf(2, 5), + mStateMachineStorage.getLatestSnapshot().getTermIndex()); + } + + @Test + public void noDeletionUnlessSignaled() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 1); + createSampleSnapshot(mStateMachineStorage, 2, 10); + createSampleSnapshot(mStateMachineStorage, 3, 100); + + mStateMachineStorage.loadLatestSnapshot(); + mStateMachineStorage.cleanupOldSnapshots(mRetentionPolicy); + // no deletion unless signaled + try (Stream s = Files.list(mStateMachineStorage.getSnapshotDir().toPath())) { + Assert.assertEquals(3, s.count()); + } + } + + @Test + public void noopDeleteIfEmpty() throws IOException { + mStateMachineStorage.loadLatestSnapshot(); + mStateMachineStorage.signalNewSnapshot(); + mStateMachineStorage.cleanupOldSnapshots(mRetentionPolicy); + try (Stream s = Files.list(mStateMachineStorage.getSnapshotDir().toPath())) { + Assert.assertEquals(0, s.count()); + } + } + + @Test + public void noopDeleteIfOneOnly() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 10); + + mStateMachineStorage.loadLatestSnapshot(); + mStateMachineStorage.signalNewSnapshot(); + mStateMachineStorage.cleanupOldSnapshots(mRetentionPolicy); + // no deletion unless signaled + try (Stream s = Files.list(mStateMachineStorage.getSnapshotDir().toPath())) { + Assert.assertEquals(1, s.count()); + } + } + + @Test + public void deleteMultiple() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 1); + createSampleSnapshot(mStateMachineStorage, 2, 10); + createSampleSnapshot(mStateMachineStorage, 3, 100); + + mStateMachineStorage.signalNewSnapshot(); + mStateMachineStorage.cleanupOldSnapshots(mRetentionPolicy); + // no deletion unless signaled + try (Stream s = Files.list(mStateMachineStorage.getSnapshotDir().toPath())) { + Assert.assertEquals(1, s.count()); + } + mStateMachineStorage.loadLatestSnapshot(); + Assert.assertEquals(TermIndex.valueOf(3, 100), + mStateMachineStorage.getLatestSnapshot().getTermIndex()); + } + + @Test + public void backwardsCompatible() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 1); + String snapshotFile = SimpleStateMachineStorage.getSnapshotFileName(2, 10); + try (FileOutputStream outputStream = + new FileOutputStream(new File(mStateMachineStorage.getSnapshotDir(), snapshotFile))) { + outputStream.write(RandomString.make().getBytes()); + } + mStateMachineStorage.loadLatestSnapshot(); + SnapshotInfo latestSnapshot = mStateMachineStorage.getLatestSnapshot(); + Assert.assertTrue(latestSnapshot instanceof SingleFileSnapshotInfo); + Assert.assertEquals(TermIndex.valueOf(2, 10), latestSnapshot.getTermIndex()); + } +} diff --git a/core/server/common/src/test/java/alluxio/master/transport/GrpcMessagingTransportTest.java b/core/server/common/src/test/java/alluxio/master/transport/GrpcMessagingTransportTest.java index b08e8aec7186..2bfee6a1f9b3 100644 --- a/core/server/common/src/test/java/alluxio/master/transport/GrpcMessagingTransportTest.java +++ b/core/server/common/src/test/java/alluxio/master/transport/GrpcMessagingTransportTest.java @@ -18,6 +18,7 @@ import io.atomix.catalyst.buffer.BufferOutput; import io.atomix.catalyst.serializer.CatalystSerializable; import io.atomix.catalyst.serializer.Serializer; +import io.grpc.StatusRuntimeException; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -150,7 +151,8 @@ public void testServerClosed() throws Exception { try { sendRequest(clientConnection, new DummyRequest("dummy")).get(); } catch (ExecutionException e) { - Assert.assertTrue(e.getCause() instanceof IllegalStateException); + Assert.assertTrue(e.getCause() instanceof IllegalStateException + || e.getCause() instanceof StatusRuntimeException); failed = true; } Assert.assertTrue(failed); diff --git a/core/server/common/src/test/java/alluxio/util/ParallelZipUtilsTest.java b/core/server/common/src/test/java/alluxio/util/compression/DirectoryMarshallerTest.java similarity index 64% rename from core/server/common/src/test/java/alluxio/util/ParallelZipUtilsTest.java rename to core/server/common/src/test/java/alluxio/util/compression/DirectoryMarshallerTest.java index 2c51edbddadb..bbc97f5c7ca2 100644 --- a/core/server/common/src/test/java/alluxio/util/ParallelZipUtilsTest.java +++ b/core/server/common/src/test/java/alluxio/util/compression/DirectoryMarshallerTest.java @@ -9,20 +9,33 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.util; +package alluxio.util.compression; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; -import java.io.FileOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collection; + +@RunWith(Parameterized.class) +public class DirectoryMarshallerTest { + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new NoCompressionMarshaller(), + new GzipMarshaller(), + new TarGzMarshaller()); + } + + @Parameterized.Parameter + public DirectoryMarshaller mMarshaller; -/** - * Units tests for {@link ParallelZipUtils}. - */ -public final class ParallelZipUtilsTest { @Rule public TemporaryFolder mFolder = new TemporaryFolder(); @@ -30,7 +43,7 @@ public final class ParallelZipUtilsTest { public void emptyDir() throws Exception { Path empty = mFolder.newFolder("emptyDir").toPath(); - zipUnzipTest(empty); + tarUntarTest(empty); } @Test @@ -39,7 +52,7 @@ public void oneFileDir() throws Exception { Path file = dir.resolve("file"); Files.write(file, "test content".getBytes()); - zipUnzipTest(dir); + tarUntarTest(dir); } @Test @@ -47,10 +60,10 @@ public void tenFileDir() throws Exception { Path dir = mFolder.newFolder("tenFileDir").toPath(); for (int i = 0; i < 10; i++) { Path file = dir.resolve("file" + i); - Files.write(file, ("test content and a lot of test content" + i).getBytes()); + Files.write(file, ("test content" + i).getBytes()); } - zipUnzipTest(dir); + tarUntarTest(dir); } @Test @@ -59,7 +72,7 @@ public void emptySubDir() throws Exception { Path subDir = dir.resolve("subDir"); Files.createDirectory(subDir); - zipUnzipTest(dir); + tarUntarTest(dir); } @Test @@ -74,18 +87,15 @@ public void nested() throws Exception { Path file = current.resolve("file"); Files.write(file, "hello world".getBytes()); - zipUnzipTest(dir); + tarUntarTest(dir); } - private void zipUnzipTest(Path path) throws Exception { - String zippedPath = mFolder.newFile("zipped").getPath(); - try (FileOutputStream fos = new FileOutputStream(zippedPath)) { - ParallelZipUtils.compress(path, fos, 5, 5); - } - - Path reconstructed = mFolder.newFolder("unzipped").toPath(); + private void tarUntarTest(Path path) throws Exception { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + mMarshaller.write(path, baos); + Path reconstructed = mFolder.newFolder("untarred").toPath(); reconstructed.toFile().delete(); - ParallelZipUtils.decompress(reconstructed, zippedPath, 5); + mMarshaller.read(reconstructed, new ByteArrayInputStream(baos.toByteArray())); FileUtil.assertDirectoriesEqual(path, reconstructed); } } diff --git a/core/server/common/src/test/java/alluxio/util/FileUtil.java b/core/server/common/src/test/java/alluxio/util/compression/FileUtil.java similarity index 98% rename from core/server/common/src/test/java/alluxio/util/FileUtil.java rename to core/server/common/src/test/java/alluxio/util/compression/FileUtil.java index 793c818db94c..937d638717e7 100644 --- a/core/server/common/src/test/java/alluxio/util/FileUtil.java +++ b/core/server/common/src/test/java/alluxio/util/compression/FileUtil.java @@ -9,7 +9,7 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.util; +package alluxio.util.compression; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; diff --git a/core/server/common/src/test/java/alluxio/util/compression/ParallelZipUtilsTest.java b/core/server/common/src/test/java/alluxio/util/compression/ParallelZipUtilsTest.java new file mode 100644 index 000000000000..754911a274cd --- /dev/null +++ b/core/server/common/src/test/java/alluxio/util/compression/ParallelZipUtilsTest.java @@ -0,0 +1,125 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.compression; + +import alluxio.util.io.FileUtils; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.FileOutputStream; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; + +/** + * Units tests for {@link ParallelZipUtils}. + */ +public final class ParallelZipUtilsTest { + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + @Test + public void emptyDir() throws Exception { + Path empty = mFolder.newFolder("emptyDir").toPath(); + + zipUnzipTest(empty); + } + + @Test + public void oneFileDir() throws Exception { + Path dir = mFolder.newFolder("oneFileDir").toPath(); + Path file = dir.resolve("file"); + Files.write(file, "test content".getBytes()); + + zipUnzipTest(dir); + } + + @Test + public void tenFileDir() throws Exception { + Path dir = mFolder.newFolder("tenFileDir").toPath(); + for (int i = 0; i < 10; i++) { + Path file = dir.resolve("file" + i); + Files.write(file, ("test content and a lot of test content" + i).getBytes()); + } + + zipUnzipTest(dir); + } + + @Test + public void emptySubDir() throws Exception { + Path dir = mFolder.newFolder("emptySubDir").toPath(); + Path subDir = dir.resolve("subDir"); + Files.createDirectory(subDir); + + zipUnzipTest(dir); + } + + @Test + public void nested() throws Exception { + Path dir = mFolder.newFolder("emptySubDir").toPath(); + Path current = dir; + for (int i = 0; i < 10; i++) { + Path newDir = current.resolve("dir" + i); + Files.createDirectory(newDir); + current = newDir; + } + Path file = current.resolve("file"); + Files.write(file, "hello world".getBytes()); + + zipUnzipTest(dir); + } + + @Test + public void compressionTest() throws Exception { + final String toCompress = "Some string that should be compressed." + + "AbAAbAAAAbAAAAAAAAbAAAAAAAAAAAAAAAAAA"; + Path dir = mFolder.newFolder("emptySubDir").toPath(); + Path file = dir.resolve("file"); + Files.write(file, toCompress.getBytes()); + long nonCompressedSize = 0; + long maxCompressedSize = 0; + + for (int compressionLevel = 0; compressionLevel < 10; compressionLevel++) { + String zippedPath = mFolder.newFile("zipped").getPath(); + try (FileOutputStream fos = new FileOutputStream(zippedPath)) { + ParallelZipUtils.compress(dir, fos, 5, compressionLevel); + } + if (compressionLevel == 0) { + nonCompressedSize = Files.size(FileSystems.getDefault().getPath(zippedPath)); + } else { + maxCompressedSize = Files.size(FileSystems.getDefault().getPath(zippedPath)); + } + Path reconstructed = mFolder.newFolder("unzipped").toPath(); + reconstructed.toFile().delete(); + ParallelZipUtils.decompress(reconstructed, zippedPath, 5); + FileUtil.assertDirectoriesEqual(dir, reconstructed); + FileUtils.deletePathRecursively(reconstructed.toString()); + FileUtils.delete(zippedPath); + } + Assert.assertTrue(nonCompressedSize > maxCompressedSize); + } + + private void zipUnzipTest(Path path) throws Exception { + String zippedPath = mFolder.newFile("zipped").getPath(); + try (FileOutputStream fos = new FileOutputStream(zippedPath)) { + ParallelZipUtils.compress(path, fos, 5, -1); + } + + Path reconstructed = mFolder.newFolder("unzipped").toPath(); + reconstructed.toFile().delete(); + ParallelZipUtils.decompress(reconstructed, zippedPath, 5); + FileUtil.assertDirectoriesEqual(path, reconstructed); + } +} diff --git a/core/server/common/src/test/java/alluxio/util/TarUtilsTest.java b/core/server/common/src/test/java/alluxio/util/compression/TarUtilsTest.java similarity index 75% rename from core/server/common/src/test/java/alluxio/util/TarUtilsTest.java rename to core/server/common/src/test/java/alluxio/util/compression/TarUtilsTest.java index c19b43b8c0cf..db95e5cb0bdd 100644 --- a/core/server/common/src/test/java/alluxio/util/TarUtilsTest.java +++ b/core/server/common/src/test/java/alluxio/util/compression/TarUtilsTest.java @@ -9,11 +9,14 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.util; +package alluxio.util.compression; import static org.mockito.ArgumentMatchers.any; +import alluxio.util.io.FileUtils; + import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -112,10 +115,37 @@ public void testLargePosixUserAndGroupIds() throws Exception { private void tarUntarTest(Path path) throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(); - TarUtils.writeTarGz(path, baos); + TarUtils.writeTarGz(path, baos, -1); Path reconstructed = mFolder.newFolder("untarred").toPath(); reconstructed.toFile().delete(); TarUtils.readTarGz(reconstructed, new ByteArrayInputStream(baos.toByteArray())); FileUtil.assertDirectoriesEqual(path, reconstructed); } + + @Test + public void compressionTest() throws Exception { + final String toCompress = "Some string that should be compressed." + + "AbAAbAAAAbAAAAAAAAbAAAAAAAAAAAAAAAAAA"; + Path dir = mFolder.newFolder("emptySubDir").toPath(); + Path file = dir.resolve("file"); + Files.write(file, toCompress.getBytes()); + long nonCompressedSize = 0; + long maxCompressedSize = 0; + + for (int compressionLevel = 0; compressionLevel < 10; compressionLevel++) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + TarUtils.writeTarGz(dir, baos, compressionLevel); + if (compressionLevel == 0) { + nonCompressedSize = baos.size(); + } else { + maxCompressedSize = baos.size(); + } + Path reconstructed = mFolder.newFolder("untarred").toPath(); + reconstructed.toFile().delete(); + TarUtils.readTarGz(reconstructed, new ByteArrayInputStream(baos.toByteArray())); + FileUtil.assertDirectoriesEqual(dir, reconstructed); + FileUtils.deletePathRecursively(reconstructed.toString()); + } + Assert.assertTrue(nonCompressedSize > maxCompressedSize); + } } diff --git a/core/server/master/pom.xml b/core/server/master/pom.xml index 8fe392d7f5de..36c5b7d4576c 100644 --- a/core/server/master/pom.xml +++ b/core/server/master/pom.xml @@ -30,6 +30,10 @@ + + software.amazon.awssdk + s3 + com.google.guava guava @@ -110,6 +114,11 @@ alluxio-job-client ${project.version} + + org.alluxio + alluxio-stress-shell + ${project.version} + @@ -117,6 +126,17 @@ guava-testlib test + + org.gaul + s3proxy + test + + + ch.qos.logback + logback-classic + + + @@ -136,6 +156,12 @@ org.apache.httpcomponents httpclient + + org.alluxio + alluxio-underfs-s3a + ${project.version} + test + diff --git a/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java b/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java index 95e6a21fad20..c387fc37eb28 100644 --- a/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java +++ b/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java @@ -14,6 +14,7 @@ import static alluxio.util.network.NetworkAddressUtils.ServiceType; import alluxio.AlluxioURI; +import alluxio.ProcessUtils; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.AlluxioException; @@ -44,6 +45,7 @@ import alluxio.underfs.UnderFileSystemConfiguration; import alluxio.util.CommonUtils; import alluxio.util.CommonUtils.ProcessType; +import alluxio.util.ThreadFactoryUtils; import alluxio.util.URIUtils; import alluxio.util.WaitForOptions; import alluxio.util.interfaces.Scoped; @@ -60,7 +62,13 @@ import java.io.IOException; import java.io.InputStream; import java.net.URI; +import java.util.ArrayList; +import java.util.List; import java.util.Optional; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; @@ -92,6 +100,12 @@ public class AlluxioMasterProcess extends MasterProcess { /** See {@link #isRunning()}. */ private volatile boolean mRunning = false; + /** last time this process gain primacy in ms. */ + private volatile long mLastGainPrimacyTime = 0; + + /** last time this process lose primacy in ms. */ + private volatile long mLastLosePrimacyTime = 0; + /** * Creates a new {@link AlluxioMasterProcess}. */ @@ -107,6 +121,12 @@ public class AlluxioMasterProcess extends MasterProcess { if (Configuration.getBoolean(PropertyKey.MASTER_THROTTLE_ENABLED)) { mRegistry.get(alluxio.master.throttle.DefaultThrottleMaster.class).setMaster(this); } + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_LAST_GAIN_PRIMACY_TIME.getName(), + () -> mLastGainPrimacyTime); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_LAST_LOSE_PRIMACY_TIME.getName(), + () -> mLastLosePrimacyTime); LOG.info("New process created."); } @@ -182,9 +202,9 @@ public boolean isInSafeMode() { public void start() throws Exception { LOG.info("Process starting."); mRunning = true; - mServices.forEach(SimpleService::start); mJournalSystem.start(); startMasterComponents(false); + mServices.forEach(SimpleService::start); // Perform the initial catchup before joining leader election, // to avoid potential delay if this master is selected as leader @@ -208,6 +228,7 @@ public void start() throws Exception { LOG.info("Started in stand-by mode."); mLeaderSelector.waitForState(NodeState.PRIMARY); + mLastGainPrimacyTime = CommonUtils.getCurrentMs(); if (!mRunning) { break; } @@ -224,15 +245,38 @@ public void start() throws Exception { throw t; } mLeaderSelector.waitForState(NodeState.STANDBY); + mLastLosePrimacyTime = CommonUtils.getCurrentMs(); if (Configuration.getBoolean(PropertyKey.MASTER_JOURNAL_EXIT_ON_DEMOTION)) { stop(); } else { if (!mRunning) { break; } + // Dump important information asynchronously + ExecutorService es = null; + List> dumpFutures = new ArrayList<>(); + try { + es = Executors.newFixedThreadPool( + 2, ThreadFactoryUtils.build("info-dumper-%d", true)); + dumpFutures.addAll(ProcessUtils.dumpInformationOnFailover(es)); + } catch (Throwable t) { + LOG.warn("Failed to dump metrics and jstacks before demotion", t); + } + // Shut down services like RPC, WebServer, Journal and all master components LOG.info("Losing the leadership."); mServices.forEach(SimpleService::demote); demote(); + // Block until information dump is done and close resources + for (Future f : dumpFutures) { + try { + f.get(); + } catch (InterruptedException | ExecutionException e) { + LOG.warn("Failed to dump metrics and jstacks before demotion", e); + } + } + if (es != null) { + es.shutdownNow(); + } } } } @@ -263,6 +307,7 @@ private boolean promote() throws Exception { if (unstable.get()) { LOG.info("Terminating an unstable attempt to become a leader."); if (Configuration.getBoolean(PropertyKey.MASTER_JOURNAL_EXIT_ON_DEMOTION)) { + ProcessUtils.dumpInformationOnExit(); stop(); } else { demote(); @@ -288,7 +333,6 @@ private void demote() throws Exception { // sockets in stopServing so that clients don't see NPEs. mJournalSystem.losePrimacy(); stopMasterComponents(); - LOG.info("Primary stopped"); startMasterComponents(false); LOG.info("Standby started"); } diff --git a/core/server/master/src/main/java/alluxio/master/AlluxioSimpleMasterProcess.java b/core/server/master/src/main/java/alluxio/master/AlluxioSimpleMasterProcess.java index 4626e0d57149..7950dde7335a 100644 --- a/core/server/master/src/main/java/alluxio/master/AlluxioSimpleMasterProcess.java +++ b/core/server/master/src/main/java/alluxio/master/AlluxioSimpleMasterProcess.java @@ -80,12 +80,16 @@ public void start() throws Exception { mLeaderSelector.start(getRpcAddress()); while (!Thread.interrupted()) { + // Start the master components in standby mode + // Eg. for job master they are the JobMaster and JournalMaster + startMasterComponents(false); + LOG.info("Standby started"); // We are in standby mode. Nothing to do until we become the primary. mLeaderSelector.waitForState(NodeState.PRIMARY); LOG.info("Transitioning from standby to primary"); mJournalSystem.gainPrimacy(); stopMasterComponents(); - LOG.info("Secondary stopped"); + LOG.info("Standby stopped"); startMasterComponents(true); mServices.forEach(SimpleService::promote); LOG.info("Primary started"); @@ -96,8 +100,6 @@ public void start() throws Exception { stopMasterComponents(); mJournalSystem.losePrimacy(); LOG.info("Primary stopped"); - startMasterComponents(false); - LOG.info("Standby started"); } } diff --git a/core/server/master/src/main/java/alluxio/master/CoreMaster.java b/core/server/master/src/main/java/alluxio/master/CoreMaster.java index 3ebafa223caa..1fa97dc82f93 100644 --- a/core/server/master/src/main/java/alluxio/master/CoreMaster.java +++ b/core/server/master/src/main/java/alluxio/master/CoreMaster.java @@ -24,6 +24,7 @@ public abstract class CoreMaster extends AbstractMaster { protected final SafeModeManager mSafeModeManager; protected final BackupManager mBackupManager; protected final JournalSystem mJournalSystem; + protected final PrimarySelector mPrimarySelector; protected final long mStartTimeMs; protected final int mPort; @@ -38,6 +39,7 @@ protected CoreMaster(CoreMasterContext context, Clock clock, mSafeModeManager = context.getSafeModeManager(); mBackupManager = context.getBackupManager(); mJournalSystem = context.getJournalSystem(); + mPrimarySelector = context.getPrimarySelector(); mStartTimeMs = context.getStartTimeMs(); mPort = context.getPort(); } diff --git a/core/server/master/src/main/java/alluxio/master/CoreMasterContext.java b/core/server/master/src/main/java/alluxio/master/CoreMasterContext.java index 69ea2a318314..220d1b2374e0 100644 --- a/core/server/master/src/main/java/alluxio/master/CoreMasterContext.java +++ b/core/server/master/src/main/java/alluxio/master/CoreMasterContext.java @@ -19,6 +19,8 @@ import com.google.common.base.Preconditions; +import javax.annotation.Nullable; + /** * This class stores fields that are specific to core masters. */ @@ -28,6 +30,8 @@ public class CoreMasterContext extends MasterContext { private final BlockMetaStore.Factory mBlockStoreFactory; private final InodeStore.Factory mInodeStoreFactory; private final JournalSystem mJournalSystem; + @Nullable + private final PrimarySelector mPrimarySelector; private final long mStartTimeMs; private final int mPort; @@ -44,6 +48,7 @@ private CoreMasterContext(Builder builder) { mJournalSystem = Preconditions.checkNotNull(builder.mJournalSystem, "journalSystem"); mStartTimeMs = builder.mStartTimeMs; mPort = builder.mPort; + mPrimarySelector = builder.mPrimarySelector; } /** @@ -93,6 +98,13 @@ public int getPort() { return mPort; } + /** + * @return the leader selector + */ + public @Nullable PrimarySelector getPrimarySelector() { + return mPrimarySelector; + } + /** * @return a new builder */ diff --git a/core/server/master/src/main/java/alluxio/master/MasterProcess.java b/core/server/master/src/main/java/alluxio/master/MasterProcess.java index 585dc889fb03..0b4228a94065 100644 --- a/core/server/master/src/main/java/alluxio/master/MasterProcess.java +++ b/core/server/master/src/main/java/alluxio/master/MasterProcess.java @@ -21,6 +21,7 @@ import alluxio.master.service.SimpleService; import alluxio.master.service.rpc.RpcServerService; import alluxio.master.service.web.WebServerService; +import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; import alluxio.util.CommonUtils; import alluxio.util.ConfigurationUtils; @@ -90,6 +91,7 @@ public MasterProcess(JournalSystem journalSystem, PrimarySelector leaderSelector mRpcConnectAddress = NetworkAddressUtils.getConnectAddress(rpcService, Configuration.global()); mWebConnectAddress = NetworkAddressUtils.getConnectAddress(webService, Configuration.global()); mStartTimeMs = System.currentTimeMillis(); + MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_START_TIME.getName(), () -> mStartTimeMs); } private static InetSocketAddress configureAddress(ServiceType service) { @@ -210,9 +212,9 @@ public final InetSocketAddress getWebAddress() { /** * @return true if the system is the leader (serving the rpc server), false otherwise */ - public boolean isGrpcServing() { + public boolean isGrpcServingAsLeader() { return mServices.stream().anyMatch(service -> service instanceof RpcServerService - && ((RpcServerService) service).isServing()); + && ((RpcServerService) service).isServingLeader()); } /** @@ -236,8 +238,8 @@ public boolean isMetricSinkServing() { * @param timeoutMs how long to wait in milliseconds * @return whether the grpc server became ready before the specified timeout */ - public boolean waitForGrpcServerReady(int timeoutMs) { - return pollFor(this + " to start", this::isGrpcServing, timeoutMs); + public boolean waitForLeaderGrpcServerReady(int timeoutMs) { + return pollFor(this + " to start", this::isGrpcServingAsLeader, timeoutMs); } /** @@ -274,6 +276,13 @@ private boolean pollFor(String message, Supplier waitFor, int timeoutMs @Override public boolean waitForReady(int timeoutMs) { - return waitForGrpcServerReady(timeoutMs); + return waitForLeaderGrpcServerReady(timeoutMs); + } + + /** + * @return the primary selector + */ + public PrimarySelector getPrimarySelector() { + return mLeaderSelector; } } diff --git a/core/server/master/src/main/java/alluxio/master/ProtobufUtils.java b/core/server/master/src/main/java/alluxio/master/ProtobufUtils.java index d5453976c4a7..82897a147d58 100644 --- a/core/server/master/src/main/java/alluxio/master/ProtobufUtils.java +++ b/core/server/master/src/main/java/alluxio/master/ProtobufUtils.java @@ -32,9 +32,11 @@ private ProtobufUtils() {} // prevent instantiation */ public static TtlAction fromProtobuf(PTtlAction pTtlAction) { if (pTtlAction == null) { - return TtlAction.DELETE; + return TtlAction.DELETE_ALLUXIO; } switch (pTtlAction) { + case DELETE_ALLUXIO: + return TtlAction.DELETE_ALLUXIO; case DELETE: return TtlAction.DELETE; case FREE: @@ -52,9 +54,11 @@ public static TtlAction fromProtobuf(PTtlAction pTtlAction) { */ public static PTtlAction toProtobuf(TtlAction ttlAction) { if (ttlAction == null) { - return PTtlAction.DELETE; + return PTtlAction.DELETE_ALLUXIO; } switch (ttlAction) { + case DELETE_ALLUXIO: + return PTtlAction.DELETE_ALLUXIO; case DELETE: return PTtlAction.DELETE; case FREE: diff --git a/core/server/master/src/main/java/alluxio/master/backup/BackupLeaderRole.java b/core/server/master/src/main/java/alluxio/master/backup/BackupLeaderRole.java index 80e9c7590171..a09577ff9113 100644 --- a/core/server/master/src/main/java/alluxio/master/backup/BackupLeaderRole.java +++ b/core/server/master/src/main/java/alluxio/master/backup/BackupLeaderRole.java @@ -30,7 +30,7 @@ import alluxio.master.transport.GrpcMessagingConnection; import alluxio.master.transport.GrpcMessagingServiceClientHandler; import alluxio.resource.LockResource; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.util.ConfigurationUtils; import alluxio.util.network.NetworkAddressUtils; import alluxio.wire.BackupStatus; @@ -144,7 +144,7 @@ public Map getRoleServices() { Configuration.global()), (conn) -> activateWorkerConnection(conn), mGrpcMessagingContext, mExecutorService, mCatalystRequestTimeout), - new ClientIpAddressInjector())).withCloseable(this)); + new ClientContextServerInjector())).withCloseable(this)); return services; } diff --git a/core/server/master/src/main/java/alluxio/master/backup/BackupWorkerRole.java b/core/server/master/src/main/java/alluxio/master/backup/BackupWorkerRole.java index fb1c57b32920..be86c591d575 100644 --- a/core/server/master/src/main/java/alluxio/master/backup/BackupWorkerRole.java +++ b/core/server/master/src/main/java/alluxio/master/backup/BackupWorkerRole.java @@ -13,6 +13,7 @@ import alluxio.AlluxioURI; import alluxio.ClientContext; +import alluxio.Constants; import alluxio.ProcessUtils; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; @@ -33,6 +34,7 @@ import alluxio.master.transport.Listener; import alluxio.retry.ExponentialBackoffRetry; import alluxio.retry.RetryPolicy; +import alluxio.util.logging.SamplingLogger; import alluxio.util.network.NetworkAddressUtils; import alluxio.wire.BackupStatus; @@ -55,6 +57,7 @@ */ public class BackupWorkerRole extends AbstractBackupRole { private static final Logger LOG = LoggerFactory.getLogger(BackupWorkerRole.class); + private static final Logger SAMPLING_LOG = new SamplingLogger(LOG, 10L * Constants.SECOND_MS); // Constant timeout for journal transition before backup. private static final long BACKUP_ABORT_AFTER_TRANSITION_TIMEOUT_MS = 30000; @@ -370,6 +373,12 @@ private void establishConnectionToLeader() { .build().getMasterInquireClient(); leaderAddress = inquireClient.getPrimaryRpcAddress(); + InetSocketAddress localAddress = NetworkAddressUtils.getConnectAddress( + NetworkAddressUtils.ServiceType.MASTER_RPC, Configuration.global()); + if (leaderAddress.equals(localAddress)) { + SAMPLING_LOG.info("Currently being promoted to leader"); + continue; + } } catch (Throwable t) { LOG.warn("Failed to get backup-leader address. Error:{}. Attempt:{}", t, infiniteRetryPolicy.getAttemptCount()); diff --git a/core/server/master/src/main/java/alluxio/master/block/BlockContainerIdGenerator.java b/core/server/master/src/main/java/alluxio/master/block/BlockContainerIdGenerator.java index d5297852016a..e09a8ccd4acf 100644 --- a/core/server/master/src/main/java/alluxio/master/block/BlockContainerIdGenerator.java +++ b/core/server/master/src/main/java/alluxio/master/block/BlockContainerIdGenerator.java @@ -41,6 +41,13 @@ public long getNextContainerId() { return mNextContainerId.get(); } + /** + * @return the next container id + */ + public long peekNewContainerId() { + return mNextContainerId.get(); + } + /** * @param id the next container id to use */ diff --git a/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java index 4d8512df91cf..1414a02cd2be 100644 --- a/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java @@ -19,13 +19,16 @@ import alluxio.exception.status.UnavailableException; import alluxio.grpc.Command; import alluxio.grpc.ConfigProperty; +import alluxio.grpc.DecommissionWorkerPOptions; import alluxio.grpc.GetRegisterLeasePRequest; import alluxio.grpc.RegisterWorkerPOptions; import alluxio.grpc.RegisterWorkerPRequest; +import alluxio.grpc.RemoveDisabledWorkerPOptions; import alluxio.grpc.StorageList; import alluxio.grpc.WorkerLostStorageInfo; import alluxio.master.Master; import alluxio.master.block.meta.MasterWorkerInfo; +import alluxio.master.journal.JournalContext; import alluxio.metrics.Metric; import alluxio.proto.meta.Block; import alluxio.wire.Address; @@ -121,12 +124,18 @@ List getWorkerReport(GetWorkerReportOptions options) */ List getWorkerLostStorage(); + /** + * @param address worker address to check + * @return true if the worker is excluded, otherwise false + */ + boolean isRejected(WorkerNetAddress address); + /** * Decommission a worker. * - * @param workerId the WorkerInfo of worker to be decommissioned + * @param requestOptions the request */ - void decommissionWorker(long workerId) throws Exception; + void decommissionWorker(DecommissionWorkerPOptions requestOptions) throws NotFoundException; /** * Removes blocks from workers. @@ -169,7 +178,30 @@ void commitBlock(long workerId, long usedBytesOnTier, String tierAlias, * @param blockId the id of the block to commit * @param length the length of the block */ - void commitBlockInUFS(long blockId, long length) throws UnavailableException; + default void commitBlockInUFS(long blockId, long length) throws UnavailableException { + try (JournalContext journalContext = createJournalContext()) { + commitBlockInUFS(blockId, length, journalContext); + } + } + + /** + * Marks a block as committed, but without a worker location. This means the block is only in ufs. + * Append any created journal entries to the included context. + * @param blockId the id of the block to commit + * @param length the length of the block + * @param context the journal context + */ + void commitBlockInUFS(long blockId, long length, JournalContext context); + + /** + * Marks a block as committed, but without a worker location. This means the block is only in ufs. + * Append any created journal entries to the included context. + * @param blockId the id of the block to commit + * @param length the length of the block + * @param context the journal context + * @param checkExists checks if the block exists + */ + void commitBlockInUFS(long blockId, long length, JournalContext context, boolean checkExists); /** * @param blockId the block id to get information for @@ -372,10 +404,15 @@ void workerRegisterStream( long getJournaledNextContainerId(); /** - * Removes all associated metadata about the decommissioned worker from block master. - * - * The worker to free must have been decommissioned. - * @param workerId the workerId of target worker + * Revert disabling a worker, enabling it to register to the cluster. + * @param requestOptions the request + */ + void removeDisabledWorker(RemoveDisabledWorkerPOptions requestOptions) throws NotFoundException; + + /** + * Notify the worker id to a master. + * @param workerId the worker id + * @param workerNetAddress the worker address */ - void removeDecommissionedWorker(long workerId) throws NotFoundException; + void notifyWorkerId(long workerId, WorkerNetAddress workerNetAddress); } diff --git a/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java index deb5c66f5125..79acbf755115 100644 --- a/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java @@ -13,10 +13,11 @@ import alluxio.RpcUtils; import alluxio.client.block.options.GetWorkerReportOptions; -import alluxio.exception.status.NotFoundException; import alluxio.grpc.BlockMasterClientServiceGrpc; import alluxio.grpc.BlockMasterInfo; import alluxio.grpc.BlockMasterInfoField; +import alluxio.grpc.DecommissionWorkerPOptions; +import alluxio.grpc.DecommissionWorkerPResponse; import alluxio.grpc.GetBlockInfoPOptions; import alluxio.grpc.GetBlockInfoPRequest; import alluxio.grpc.GetBlockInfoPResponse; @@ -32,10 +33,8 @@ import alluxio.grpc.GetWorkerLostStoragePResponse; import alluxio.grpc.GetWorkerReportPOptions; import alluxio.grpc.GrpcUtils; -import alluxio.grpc.RemoveDecommissionedWorkerPOptions; -import alluxio.grpc.RemoveDecommissionedWorkerPResponse; -import alluxio.grpc.WorkerRange; -import alluxio.wire.WorkerInfo; +import alluxio.grpc.RemoveDisabledWorkerPOptions; +import alluxio.grpc.RemoveDisabledWorkerPResponse; import com.google.common.base.Preconditions; import io.grpc.stub.StreamObserver; @@ -43,7 +42,6 @@ import org.slf4j.LoggerFactory; import java.util.Arrays; -import java.util.List; import java.util.stream.Collectors; /** @@ -145,21 +143,13 @@ public void getWorkerInfoList(GetWorkerInfoListPOptions options, } @Override - public void removeDecommissionedWorker(RemoveDecommissionedWorkerPOptions options, - StreamObserver responseObserver) { + public void removeDisabledWorker(RemoveDisabledWorkerPOptions options, + StreamObserver responseObserver) { RpcUtils.call(LOG, () -> { - List decommissionedWorkers = mBlockMaster.getWorkerReport( - new GetWorkerReportOptions(GetWorkerReportPOptions.newBuilder() - .setWorkerRange(WorkerRange.DECOMMISSIONED).build())); - for (WorkerInfo worker : decommissionedWorkers) { - if (worker.getAddress().getHost().equals(options.getWorkerName())) { - mBlockMaster.removeDecommissionedWorker(worker.getId()); - return RemoveDecommissionedWorkerPResponse.getDefaultInstance(); - } - } - // Exception info has been added in FreeWorkerCommand. - throw new NotFoundException(options.getWorkerName()); - }, "RemoveDecommissionedWorker", "options=%s", responseObserver, options); + // This command is idempotent and is no-op if the address is not recognized + mBlockMaster.removeDisabledWorker(options); + return RemoveDisabledWorkerPResponse.getDefaultInstance(); + }, "RemoveDisabledWorker", "options=%s", responseObserver, options); } @Override @@ -180,4 +170,13 @@ public void getWorkerLostStorage(GetWorkerLostStoragePOptions options, .addAllWorkerLostStorageInfo(mBlockMaster.getWorkerLostStorage()).build(), "GetWorkerLostStorage", "options=%s", responseObserver, options); } + + @Override + public void decommissionWorker(DecommissionWorkerPOptions options, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + mBlockMaster.decommissionWorker(options); + return DecommissionWorkerPResponse.getDefaultInstance(); + }, "DecommissionWorker", "request=%s", responseObserver, options); + } } diff --git a/core/server/master/src/main/java/alluxio/master/block/BlockMasterWorkerServiceHandler.java b/core/server/master/src/main/java/alluxio/master/block/BlockMasterWorkerServiceHandler.java index 53695cda4108..45f15da74213 100644 --- a/core/server/master/src/main/java/alluxio/master/block/BlockMasterWorkerServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/block/BlockMasterWorkerServiceHandler.java @@ -28,6 +28,8 @@ import alluxio.grpc.GetWorkerIdPResponse; import alluxio.grpc.GrpcUtils; import alluxio.grpc.LocationBlockIdListEntry; +import alluxio.grpc.NotifyWorkerIdPRequest; +import alluxio.grpc.NotifyWorkerIdPResponse; import alluxio.grpc.RegisterWorkerPOptions; import alluxio.grpc.RegisterWorkerPRequest; import alluxio.grpc.RegisterWorkerPResponse; @@ -149,11 +151,12 @@ public void registerWorker(RegisterWorkerPRequest request, final long workerId = request.getWorkerId(); RegisterWorkerPOptions options = request.getOptions(); + final boolean leaseEnabled = + Configuration.getBoolean(PropertyKey.MASTER_WORKER_REGISTER_LEASE_ENABLED); RpcUtils.call(LOG, () -> { // The exception will be propagated to the worker side and the worker should retry. - if (Configuration.getBoolean(PropertyKey.MASTER_WORKER_REGISTER_LEASE_ENABLED) - && !mBlockMaster.hasRegisterLease(workerId)) { + if (leaseEnabled && !mBlockMaster.hasRegisterLease(workerId)) { String errorMsg = String.format("Worker %s does not have a lease or the lease " + "has expired. The worker should acquire a new lease and retry to register.", workerId); @@ -172,9 +175,13 @@ public void registerWorker(RegisterWorkerPRequest request, // If the register is unsuccessful, the lease will be kept around until the expiry. // The worker can retry and use the existing lease. mBlockMaster.workerRegister(workerId, storageTiers, totalBytesOnTiers, usedBytesOnTiers, - currBlocksOnLocationMap, lostStorageMap, options); - LOG.info("Worker {} finished registering, releasing its lease.", workerId); - mBlockMaster.releaseRegisterLease(workerId); + currBlocksOnLocationMap, lostStorageMap, options); + if (leaseEnabled) { + LOG.info("Worker {} finished registering, releasing its lease.", workerId); + mBlockMaster.releaseRegisterLease(workerId); + } else { + LOG.info("Worker {} finished registering.", workerId); + } return RegisterWorkerPResponse.getDefaultInstance(); }, "registerWorker", true, "request=%s", responseObserver, workerId); } @@ -213,4 +220,15 @@ static Map> reconstructBlocksOnLocationMap( + "with LocationBlockIdListEntry objects %s", workerId, entryReport)); })); } + + @Override + public void notifyWorkerId( + NotifyWorkerIdPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + mBlockMaster.notifyWorkerId(request.getWorkerId(), + GrpcUtils.fromProto(request.getWorkerNetAddress())); + return alluxio.grpc.NotifyWorkerIdPResponse.getDefaultInstance(); + }, "notifyWorkerId", "request=%s", responseObserver, request); + } } diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index 7a44abbb0611..6100141fbd46 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -15,7 +15,6 @@ import alluxio.DefaultStorageTierAssoc; import alluxio.Server; import alluxio.StorageTierAssoc; -import alluxio.annotation.SuppressFBWarnings; import alluxio.client.block.options.GetWorkerReportOptions; import alluxio.client.block.options.GetWorkerReportOptions.WorkerRange; import alluxio.clock.SystemClock; @@ -26,29 +25,37 @@ import alluxio.conf.PropertyKey; import alluxio.exception.BlockInfoException; import alluxio.exception.ExceptionMessage; +import alluxio.exception.runtime.UnavailableRuntimeException; import alluxio.exception.status.InvalidArgumentException; import alluxio.exception.status.NotFoundException; import alluxio.exception.status.UnavailableException; import alluxio.grpc.Command; import alluxio.grpc.CommandType; import alluxio.grpc.ConfigProperty; +import alluxio.grpc.DecommissionWorkerPOptions; import alluxio.grpc.GetRegisterLeasePRequest; import alluxio.grpc.GrpcService; import alluxio.grpc.GrpcUtils; +import alluxio.grpc.NodeState; import alluxio.grpc.RegisterWorkerPOptions; import alluxio.grpc.RegisterWorkerPRequest; +import alluxio.grpc.RemoveDisabledWorkerPOptions; import alluxio.grpc.ServiceType; import alluxio.grpc.StorageList; import alluxio.grpc.WorkerLostStorageInfo; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatExecutor; import alluxio.heartbeat.HeartbeatThread; import alluxio.master.CoreMaster; import alluxio.master.CoreMasterContext; +import alluxio.master.WorkerState; import alluxio.master.block.meta.MasterWorkerInfo; import alluxio.master.block.meta.WorkerMetaLockSection; import alluxio.master.journal.JournalContext; +import alluxio.master.journal.SingleEntryJournaled; import alluxio.master.journal.checkpoint.CheckpointName; +import alluxio.master.journal.checkpoint.Checkpointed; import alluxio.master.metastore.BlockMetaStore; import alluxio.master.metastore.BlockMetaStore.Block; import alluxio.master.metrics.MetricsMaster; @@ -64,12 +71,15 @@ import alluxio.proto.meta.Block.BlockMeta; import alluxio.resource.CloseableIterator; import alluxio.resource.LockResource; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.util.CommonUtils; import alluxio.util.IdUtils; import alluxio.util.ThreadFactoryUtils; +import alluxio.util.WaitForOptions; import alluxio.util.executor.ExecutorServiceFactories; import alluxio.util.executor.ExecutorServiceFactory; import alluxio.util.network.NetworkAddressUtils; +import alluxio.util.proto.BlockLocationUtils; import alluxio.wire.Address; import alluxio.wire.BlockInfo; import alluxio.wire.RegisterLease; @@ -83,10 +93,12 @@ import com.google.common.cache.LoadingCache; import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.Striped; +import io.grpc.ServerInterceptors; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; import java.io.IOException; import java.net.UnknownHostException; import java.time.Clock; @@ -104,12 +116,14 @@ import java.util.NoSuchElementException; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Lock; import java.util.function.BiConsumer; @@ -164,6 +178,11 @@ public class DefaultBlockMaster extends CoreMaster implements BlockMaster { private static final Logger LOG = LoggerFactory.getLogger(DefaultBlockMaster.class); + private static final String WORKER_DISABLED = + "Worker with address %s is manually decommissioned and marked not able to join " + + "the cluster again. If you want this worker to register to the cluster again, " + + "use `bin/alluxio fsadmin enableWorker -h ` command."; + /** * Concurrency and locking in the BlockMaster * @@ -232,6 +251,7 @@ public class DefaultBlockMaster extends CoreMaster implements BlockMaster { /** Worker is not visualable until registration completes. */ private final IndexedSet mTempWorkers = new IndexedSet<>(ID_INDEX, ADDRESS_INDEX); + private final Set mRejectWorkers = new ConcurrentHashSet<>(); /** * Keeps track of workers which have been decommissioned. * For we need to distinguish the lost worker accidentally and the decommissioned worker manually. @@ -264,12 +284,6 @@ public class DefaultBlockMaster extends CoreMaster implements BlockMaster { /** Handle to the metrics master. */ private final MetricsMaster mMetricsMaster; - /** - * The service that detects lost worker nodes, and tries to restart the failed workers. - * We store it here so that it can be accessed from tests. - */ - @SuppressFBWarnings("URF_UNREAD_FIELD") - /* The value of the 'next container id' last journaled. */ @GuardedBy("mBlockContainerIdGenerator") private volatile long mJournaledNextContainerId = 0; @@ -282,6 +296,14 @@ public class DefaultBlockMaster extends CoreMaster implements BlockMaster { private final RegisterLeaseManager mRegisterLeaseManager = new RegisterLeaseManager(); + private final HashMap mWorkerIdMap = new HashMap<>(); + + private final boolean mWorkerRegisterToAllMasters = Configuration.getBoolean( + PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS); + + private final boolean mStandbyMasterRpcEnabled = Configuration.getBoolean( + PropertyKey.STANDBY_MASTER_GRPC_ENABLED); + /** * Creates a new instance of {@link DefaultBlockMaster}. * @@ -344,12 +366,24 @@ public String getName() { public Map getServices() { Map services = new HashMap<>(); services.put(ServiceType.BLOCK_MASTER_CLIENT_SERVICE, - new GrpcService(new BlockMasterClientServiceHandler(this))); + new GrpcService(ServerInterceptors + .intercept(new BlockMasterClientServiceHandler(this), + new ClientContextServerInjector()))); services.put(ServiceType.BLOCK_MASTER_WORKER_SERVICE, - new GrpcService(new BlockMasterWorkerServiceHandler(this))); + new GrpcService(ServerInterceptors + .intercept(new BlockMasterWorkerServiceHandler(this), + new ClientContextServerInjector()))); return services; } + @Override + public Map getStandbyServices() { + if (Configuration.getBoolean(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS)) { + return getServices(); + } + return Collections.emptyMap(); + } + @Override public boolean processJournalEntry(JournalEntry entry) { // TODO(gene): A better way to process entries besides a huge switch? @@ -363,6 +397,9 @@ public boolean processJournalEntry(JournalEntry entry) { long length = blockInfoEntry.getLength(); Optional block = mBlockMetaStore.getBlock(blockInfoEntry.getBlockId()); if (block.isPresent()) { + // If we write multiple replicas, multiple streams will all write BlockInfoEntry + // when they CommitBlock. We rely on the idempotence to handle duplicate entries + // and only warning when there are inconsistencies. long oldLen = block.get().getLength(); if (oldLen != Constants.UNKNOWN_SIZE) { LOG.warn("Attempting to update block length ({}) to a different length ({}).", oldLen, @@ -372,6 +409,27 @@ public boolean processJournalEntry(JournalEntry entry) { } mBlockMetaStore.putBlock(blockInfoEntry.getBlockId(), BlockMeta.newBuilder().setLength(blockInfoEntry.getLength()).build()); + // This can be called when + // 1. The master is replaying the journal. + // 2. A standby master is applying a journal entry from the primary master. + if (blockInfoEntry.hasBlockLocation()) { + alluxio.grpc.BlockLocation blockLocation = blockInfoEntry.getBlockLocation(); + long workerId = blockLocation.getWorkerId(); + MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); + if (worker == null) { + // The master is replaying journal or somehow the worker is not there anymore + // We do not add the BlockLocation because the workerId is not reliable anymore + // If the worker comes back, it will register and BlockLocation will be added then + return true; + } + // The master is running and the journal is from an existing worker + mBlockMetaStore.addLocation(blockInfoEntry.getBlockId(), BlockLocationUtils.getCached( + workerId, blockLocation.getTierAlias(), blockLocation.getMediumType()) + ); + + worker.addBlock(blockInfoEntry.getBlockId()); + LOG.debug("Added BlockLocation for {} to worker {}", blockInfoEntry.getBlockId(), workerId); + } } else { return false; } @@ -387,9 +445,38 @@ public void resetState() { @Override public CheckpointName getCheckpointName() { + if (mBlockMetaStore instanceof Checkpointed) { + return ((Checkpointed) mBlockMetaStore).getCheckpointName(); + } return CheckpointName.BLOCK_MASTER; } + @Override + public CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + if (mBlockMetaStore instanceof Checkpointed) { + SingleEntryJournaled containerIdJournal = new DefaultBlockMasterContainerIdJournaled(); + containerIdJournal.processJournalEntry(getContainerIdJournalEntry()); + return CompletableFuture.allOf(( + (Checkpointed) mBlockMetaStore).writeToCheckpoint(directory, executorService), + containerIdJournal.writeToCheckpoint(directory, executorService)); + } + return super.writeToCheckpoint(directory, executorService); + } + + @Override + public CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + if (mBlockMetaStore instanceof Checkpointed) { + SingleEntryJournaled containerIdJournal = new DefaultBlockMasterContainerIdJournaled(); + return CompletableFuture.allOf(( + (Checkpointed) mBlockMetaStore).restoreFromCheckpoint(directory, executorService), + containerIdJournal.restoreFromCheckpoint(directory, executorService) + .thenRun(() -> processJournalEntry(containerIdJournal.getEntry()))); + } + return super.restoreFromCheckpoint(directory, executorService); + } + @Override public CloseableIterator getJournalEntryIterator() { CloseableIterator blockStoreIterator = mBlockMetaStore.getCloseableIterator(); @@ -404,6 +491,10 @@ public JournalEntry next() { if (!hasNext()) { throw new NoSuchElementException(); } + /* + * When the BlockStore is RocksBlockMetaStore, thread safety is embedded in the iterator. + * So no need to worry if the RocksDB is closed while this iterator is active. + */ Block block = blockStoreIterator.next(); BlockInfoEntry blockInfoEntry = BlockInfoEntry.newBuilder().setBlockId(block.getId()) @@ -436,7 +527,7 @@ public class WorkerRegisterStreamGCExecutor implements HeartbeatExecutor { .getMs(PropertyKey.MASTER_WORKER_REGISTER_STREAM_RESPONSE_TIMEOUT); @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { AtomicInteger removedSessions = new AtomicInteger(0); mActiveRegisterContexts.entrySet().removeIf((entry) -> { WorkerRegisterContext context = entry.getValue(); @@ -478,10 +569,11 @@ public void close() { @Override public void start(Boolean isLeader) throws IOException { super.start(isLeader); - if (isLeader) { + if (isLeader || mWorkerRegisterToAllMasters) { getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_LOST_WORKER_DETECTION, new LostWorkerDetectionHeartbeatExecutor(), - (int) Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_DETECTION_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_DETECTION_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); } @@ -489,12 +581,14 @@ HeartbeatContext.MASTER_LOST_WORKER_DETECTION, new LostWorkerDetectionHeartbeatE getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_WORKER_REGISTER_SESSION_CLEANER, new WorkerRegisterStreamGCExecutor(), - (int) Configuration.getMs(PropertyKey.MASTER_WORKER_REGISTER_STREAM_RESPONSE_TIMEOUT), + () -> new FixedIntervalSupplier(Configuration.getMs( + PropertyKey.MASTER_WORKER_REGISTER_STREAM_RESPONSE_TIMEOUT)), Configuration.global(), mMasterContext.getUserState())); } @Override public void stop() throws IOException { + LOG.info("Next container id before close: {}", mBlockContainerIdGenerator.peekNewContainerId()); super.stop(); } @@ -583,11 +677,13 @@ public List getWorkerInfoList() throws UnavailableException { } private List constructWorkerInfoList() { + // TODO(jiacheng): investigate why this cache is refreshed so many times by the + // alluxio.master.scheduler.Scheduler L239 List workerInfoList = new ArrayList<>(mWorkers.size()); for (MasterWorkerInfo worker : mWorkers) { // extractWorkerInfo handles the locking internally workerInfoList.add(extractWorkerInfo(worker, - GetWorkerReportOptions.WorkerInfoField.ALL, true)); + GetWorkerReportOptions.WorkerInfoField.ALL, WorkerState.LIVE)); } return workerInfoList; } @@ -601,18 +697,36 @@ public List getLostWorkersInfoList() throws UnavailableException { for (MasterWorkerInfo worker : mLostWorkers) { // extractWorkerInfo handles the locking internally workerInfoList.add(extractWorkerInfo(worker, - GetWorkerReportOptions.WorkerInfoField.ALL, false)); + GetWorkerReportOptions.WorkerInfoField.ALL, WorkerState.LOST)); } workerInfoList.sort(new WorkerInfo.LastContactSecComparator()); return workerInfoList; } @Override - public void removeDecommissionedWorker(long workerId) throws NotFoundException { - MasterWorkerInfo worker = getWorker(workerId); - Preconditions.checkNotNull(mDecommissionedWorkers - .getFirstByField(ADDRESS_INDEX, worker.getWorkerAddress())); - processFreedWorker(worker); + public void removeDisabledWorker(RemoveDisabledWorkerPOptions requestOptions) + throws NotFoundException { + if (mStandbyMasterRpcEnabled && mPrimarySelector.getStateUnsafe() == NodeState.STANDBY) { + throw new UnavailableRuntimeException( + "RemoveDisabledWorker operation is not supported on standby masters"); + } + String workerHostName = requestOptions.getWorkerHostname(); + long workerWebPort = requestOptions.getWorkerWebPort(); + AtomicBoolean found = new AtomicBoolean(false); + mRejectWorkers.removeIf(entry -> { + if (entry.getHost().equals(workerHostName) && entry.getWebPort() == workerWebPort) { + LOG.info("Received admin command to re-accept worker {}. The worker should be " + + "accepted to the cluster when it registers again.", entry); + found.set(true); + return true; + } + return false; + }); + if (!found.get()) { + LOG.info("Received admin command to re-accept worker {} but the worker is " + + "not decommissioned. The worker will be able to register to the cluster normally. " + + "No further action is required.", workerHostName); + } } @Override @@ -679,15 +793,31 @@ public List getWorkerReport(GetWorkerReportOptions options) + selectedDecommissionedWorkers.size()); for (MasterWorkerInfo worker : selectedLiveWorkers) { // extractWorkerInfo handles the locking internally - workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), true)); + if (mRejectWorkers.contains(worker.getWorkerAddress())) { + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), + WorkerState.DISABLED)); + } else { + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), WorkerState.LIVE)); + } } for (MasterWorkerInfo worker : selectedLostWorkers) { // extractWorkerInfo handles the locking internally - workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), false)); + if (mRejectWorkers.contains(worker.getWorkerAddress())) { + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), + WorkerState.DISABLED)); + } else { + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), WorkerState.LOST)); + } } for (MasterWorkerInfo worker : selectedDecommissionedWorkers) { // extractWorkerInfo handles the locking internally - workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), false)); + if (mRejectWorkers.contains(worker.getWorkerAddress())) { + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), + WorkerState.DISABLED)); + } else { + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), + WorkerState.DECOMMISSIONED)); + } } return workerInfoList; } @@ -696,9 +826,9 @@ public List getWorkerReport(GetWorkerReportOptions options) * Locks the {@link MasterWorkerInfo} properly and convert it to a {@link WorkerInfo}. */ private WorkerInfo extractWorkerInfo(MasterWorkerInfo worker, - Set fieldRange, boolean isLiveWorker) { + Set fieldRange, WorkerState workerState) { try (LockResource r = worker.lockWorkerMetaForInfo(fieldRange)) { - return worker.generateWorkerInfo(fieldRange, isLiveWorker); + return worker.generateWorkerInfo(fieldRange, workerState); } } @@ -757,6 +887,9 @@ public void removeBlocks(Collection blockIds, boolean delete) throws Unava // with the block), the block will not be freed ever. The locking logic in // workerRegister should be changed to address this race condition. for (long workerId : workerIds) { + // No need to update if the worker is lost or decommissioned + // When that lost/decommissioned worker registers again, those removed blocks + // will not be recognized, and the master will instruct the worker to remove them anyway MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker != null) { try (LockResource r = worker.lockWorkerMeta( @@ -770,9 +903,68 @@ public void removeBlocks(Collection blockIds, boolean delete) throws Unava } @Override - public void decommissionWorker(long workerId) - throws Exception { - //TODO(Tony Sun): added in another pr. + public boolean isRejected(WorkerNetAddress address) { + return mRejectWorkers.contains(address); + } + + @Override + public void decommissionWorker(DecommissionWorkerPOptions requestOptions) + throws NotFoundException { + String workerHostName = requestOptions.getWorkerHostname(); + long workerWebPort = requestOptions.getWorkerWebPort(); + boolean canRegisterAgain = requestOptions.getCanRegisterAgain(); + LOG.info("Decommissioning worker {}:{}", requestOptions.getWorkerHostname(), + requestOptions.getWorkerWebPort()); + for (MasterWorkerInfo workerInfo : mWorkers) { + WorkerNetAddress address = workerInfo.getWorkerAddress(); + if (workerHostName.equals(address.getHost()) && workerWebPort == address.getWebPort()) { + LOG.info("Found worker to decommission {}", workerInfo.getWorkerAddress()); + try (LockResource r = workerInfo.lockWorkerMeta( + EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { + processDecommissionedWorker(workerInfo, canRegisterAgain); + } + LOG.info("Worker {}@{}:{} has been added to the decommissionedWorkers set.", + workerInfo.getId(), workerHostName, workerWebPort); + return; + } + } + // The worker is not active, but it has been decommissioned from a previous call + for (MasterWorkerInfo workerInfo : mDecommissionedWorkers) { + WorkerNetAddress address = workerInfo.getWorkerAddress(); + if (workerHostName.equals(address.getHost()) && workerWebPort == address.getWebPort()) { + LOG.info("Worker {}@{}:{} has been decommissioned already", + workerInfo.getId(), workerHostName, workerWebPort); + return; + } + } + // If the worker is about to register, it may register back even if we decommission it + // here. So we let the admin wait until the worker is registered, to reduce the number of + // states to manage. + for (MasterWorkerInfo workerInfo : mTempWorkers) { + WorkerNetAddress address = workerInfo.getWorkerAddress(); + if (workerHostName.equals(address.getHost()) && workerWebPort == address.getWebPort()) { + throw new NotFoundException(ExceptionMessage.WORKER_DECOMMISSIONED_BEFORE_REGISTER + .getMessage(workerHostName + ":" + workerWebPort)); + } + } + // If the worker is lost, we guess it is more likely that the worker will not come back + // immediately + for (MasterWorkerInfo workerInfo : mLostWorkers) { + WorkerNetAddress address = workerInfo.getWorkerAddress(); + if (workerHostName.equals(address.getHost()) && workerWebPort == address.getWebPort()) { + LOG.info("Found worker to decommission {} from lost workers", + workerInfo.getWorkerAddress()); + try (LockResource r = workerInfo.lockWorkerMeta( + EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { + processDecommissionedWorker(workerInfo, canRegisterAgain); + } + LOG.info("A lost worker {}@{}:{} has been added to the decommissionedWorkers set.", + workerInfo.getId(), workerHostName, workerWebPort); + return; + } + } + throw new NotFoundException(ExceptionMessage.WORKER_NOT_FOUND + .getMessage(workerHostName + ":" + workerWebPort)); } @Override @@ -897,11 +1089,36 @@ public void commitBlock(long workerId, long usedBytesOnTier, String tierAlias, throws NotFoundException, UnavailableException { LOG.debug("Commit block from workerId: {}, usedBytesOnTier: {}, blockId: {}, length: {}", workerId, usedBytesOnTier, blockId, length); - MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); // TODO(peis): Check lost workers as well. if (worker == null) { - throw new NotFoundException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId)); + /* + * If the worker is not recognized: + * 1. [Probably] The worker has been decommissioned and removed from the active worker list + * 2. [Possible] The worker has not finished its register process. Maybe the master has + * failed over and the worker has not registered to this new primary. + * 3. [Unlikely] The worker does not belong to this cluster and has never registered. + * This is unlikely because the worker has an ID and it must be from some master. + * 4. [Unlikely] The worker is lost to the master. This is unlikely because the CommitBlock + * call is from the worker. This is more possibly the master is busy and did not + * handle the worker's heartbeat message for too long. + */ + worker = mDecommissionedWorkers.getFirstByField(ID_INDEX, workerId); + if (worker == null) { + throw new NotFoundException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId)); + } else { + WorkerNetAddress addr = worker.getWorkerAddress(); + LOG.info("Committing blocks from a decommissioned worker {}", + addr.getHost() + ":" + addr.getRpcPort()); + /* + * Even though the worker is now decommissioned, the master still accepts the block + * and updates the BlockLocation normally. + * Updating the BlockLocation is not strictly necessary, because when the worker + * registers again after restart, all locations will be rebuilt. + * But for simplicity, the location is still updated. + * A disabled worker is allowed to commit block, so ongoing operations will succeed. + */ + } } try (JournalContext journalContext = createJournalContext()) { @@ -917,17 +1134,29 @@ public void commitBlock(long workerId, long usedBytesOnTier, String tierAlias, block.get().getLength(), length); } else { mBlockMetaStore.putBlock(blockId, BlockMeta.newBuilder().setLength(length).build()); - BlockInfoEntry blockInfo = - BlockInfoEntry.newBuilder().setBlockId(blockId).setLength(length).build(); - journalContext.append(JournalEntry.newBuilder().setBlockInfo(blockInfo).build()); + BlockInfoEntry.Builder blockInfoBuilder = + BlockInfoEntry.newBuilder().setBlockId(blockId).setLength(length); + if (mWorkerRegisterToAllMasters) { + blockInfoBuilder + .setBlockId(blockId) + .setLength(length) + .setBlockLocation( + alluxio.grpc.BlockLocation.newBuilder() + .setWorkerId(workerId) + .setMediumType(mediumType) + .setTierAlias(tierAlias) + // Worker addresses are not journaled because adding a block location + // into the meta store only needs a worker id. + .build() + ); + } + journalContext.append( + JournalEntry.newBuilder().setBlockInfo(blockInfoBuilder.build()).build()); } } // Update the block metadata with the new worker location. - mBlockMetaStore.addLocation(blockId, BlockLocation.newBuilder() - .setWorkerId(workerId) - .setTier(tierAlias) - .setMediumType(mediumType) - .build()); + mBlockMetaStore.addLocation(blockId, BlockLocationUtils.getCached( + workerId, tierAlias, mediumType)); // This worker has this block, so it is no longer lost. mLostBlocks.remove(blockId); @@ -944,11 +1173,16 @@ public void commitBlock(long workerId, long usedBytesOnTier, String tierAlias, } @Override - public void commitBlockInUFS(long blockId, long length) throws UnavailableException { + public void commitBlockInUFS(long blockId, long length, JournalContext journalContext) { + commitBlockInUFS(blockId, length, journalContext, true); + } + + @Override + public void commitBlockInUFS( + long blockId, long length, JournalContext journalContext, boolean checkExists) { LOG.debug("Commit block in ufs. blockId: {}, length: {}", blockId, length); - try (JournalContext journalContext = createJournalContext(); - LockResource r = lockBlock(blockId)) { - if (mBlockMetaStore.getBlock(blockId).isPresent()) { + try (LockResource r = lockBlock(blockId)) { + if (checkExists && mBlockMetaStore.getBlock(blockId).isPresent()) { // Block metadata already exists, so do not need to create a new one. return; } @@ -1047,7 +1281,7 @@ private MasterWorkerInfo findUnregisteredWorker(long workerId) { * @param workerId the worker id to register */ @Nullable - private MasterWorkerInfo recordWorkerRegistration(long workerId) { + protected MasterWorkerInfo recordWorkerRegistration(long workerId) { for (IndexedSet workers: Arrays.asList(mTempWorkers, mLostWorkers, mDecommissionedWorkers)) { MasterWorkerInfo worker = workers.getFirstByField(ID_INDEX, workerId); @@ -1074,6 +1308,16 @@ private MasterWorkerInfo recordWorkerRegistration(long workerId) { @Override public long getWorkerId(WorkerNetAddress workerNetAddress) { + if (mStandbyMasterRpcEnabled && mPrimarySelector.getStateUnsafe() == NodeState.STANDBY) { + throw new UnavailableRuntimeException( + "GetWorkerId operation is not supported on standby masters"); + } + if (isRejected(workerNetAddress)) { + String msg = String.format(WORKER_DISABLED, workerNetAddress); + LOG.warn("{}", msg); + throw new UnavailableRuntimeException(msg); + } + LOG.info("Worker {} requesting for an ID", workerNetAddress); MasterWorkerInfo existingWorker = mWorkers.getFirstByField(ADDRESS_INDEX, workerNetAddress); if (existingWorker != null) { // This worker address is already mapped to a worker id. @@ -1092,11 +1336,32 @@ public long getWorkerId(WorkerNetAddress workerNetAddress) { while (!mTempWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress))) { workerId = IdUtils.getRandomNonNegativeLong(); } - LOG.info("getWorkerId(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId); return workerId; } + @Override + public void notifyWorkerId(long workerId, WorkerNetAddress workerNetAddress) { + MasterWorkerInfo existingWorker = mWorkers.getFirstByField(ID_INDEX, workerId); + if (existingWorker != null) { + LOG.warn("A registered worker {} comes again from {}", + workerId, existingWorker.getWorkerAddress()); + return; + } + + existingWorker = findUnregisteredWorker(workerId); + if (existingWorker != null) { + LOG.warn("An unregistered worker {} comes again from {}", + workerId, existingWorker.getWorkerAddress()); + return; + } + + if (!mTempWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress))) { + throw new RuntimeException("Duplicated worker ID for " + workerId + ": " + workerNetAddress); + } + LOG.info("notifyWorkerId(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId); + } + @Override public Optional tryAcquireRegisterLease(GetRegisterLeasePRequest request) { return mRegisterLeaseManager.tryAcquireLease(request); @@ -1118,7 +1383,6 @@ public void workerRegister(long workerId, List storageTiers, Map> currentBlocksOnLocation, Map lostStorage, RegisterWorkerPOptions options) throws NotFoundException { - MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker == null) { @@ -1128,8 +1392,13 @@ public void workerRegister(long workerId, List storageTiers, if (worker == null) { throw new NotFoundException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId)); } + if (isRejected(worker.getWorkerAddress())) { + throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, + worker.getWorkerAddress())); + } worker.setBuildVersion(options.getBuildVersion()); + worker.setNumVCpu(options.getNumVCpu()); // Gather all blocks on this worker. int totalSize = currentBlocksOnLocation.values().stream().mapToInt(List::size).sum(); @@ -1185,9 +1454,53 @@ public MasterWorkerInfo getWorker(long workerId) throws NotFoundException { return worker; } + private MasterWorkerInfo getLiveOrDecommissionedWorker(long workerId) { + MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); + if (worker != null) { + return worker; + } + // If not found in the decommissioned worker, this returns null + return mDecommissionedWorkers.getFirstByField(ID_INDEX, workerId); + } + + private void processDecommissionedWorkerBlocks(MasterWorkerInfo workerInfo) { + processWorkerRemovedBlocks(workerInfo, workerInfo.getBlocks(), false); + } + + /** + * Updates the metadata for the specified decommissioned worker. + * @param worker the master worker info + */ + private void processDecommissionedWorker(MasterWorkerInfo worker, boolean canRegisterAgain) { + WorkerNetAddress address = worker.getWorkerAddress(); + if (canRegisterAgain) { + LOG.info("Worker with address {} is decommissioned but will be accepted when it " + + "registers again.", address); + } else { + LOG.info("Worker with address {} will be rejected on register/heartbeat", address); + mRejectWorkers.add(address); + } + + mDecommissionedWorkers.add(worker); + // Remove worker from all other possible states + mWorkers.remove(worker); + mTempWorkers.remove(worker); + mLostWorkers.remove(worker); + // Invalidate cache to trigger new build of worker info list + mWorkerInfoCache.invalidate(WORKER_INFO_CACHE_KEY); + + WorkerNetAddress workerNetAddress = worker.getWorkerAddress(); + // TODO(bzheng888): Maybe need a new listener such as WorkerDecommissionListener. + for (Consumer
function : mWorkerLostListeners) { + function.accept(new Address(workerNetAddress.getHost(), workerNetAddress.getRpcPort())); + } + processDecommissionedWorkerBlocks(worker); + } + @Override public void workerRegisterStream(WorkerRegisterContext context, RegisterWorkerPRequest chunk, boolean isFirstMsg) { + // TODO(jiacheng): find a place to check the lease if (isFirstMsg) { workerRegisterStart(context, chunk); } else { @@ -1197,19 +1510,21 @@ public void workerRegisterStream(WorkerRegisterContext context, protected void workerRegisterStart(WorkerRegisterContext context, RegisterWorkerPRequest chunk) { + MasterWorkerInfo workerInfo = context.getWorkerInfo(); + Preconditions.checkState(workerInfo != null, + "No workerInfo metadata found in the WorkerRegisterContext!"); + if (isRejected(workerInfo.getWorkerAddress())) { + throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, + workerInfo.getWorkerAddress())); + } final List storageTiers = chunk.getStorageTiersList(); final Map totalBytesOnTiers = chunk.getTotalBytesOnTiersMap(); final Map usedBytesOnTiers = chunk.getUsedBytesOnTiersMap(); final Map lostStorage = chunk.getLostStorageMap(); - final Map> currentBlocksOnLocation = BlockMasterWorkerServiceHandler.reconstructBlocksOnLocationMap( chunk.getCurrentBlocksList(), context.getWorkerId()); RegisterWorkerPOptions options = chunk.getOptions(); - - MasterWorkerInfo workerInfo = context.getWorkerInfo(); - Preconditions.checkState(workerInfo != null, - "No workerInfo metadata found in the WorkerRegisterContext!"); mActiveRegisterContexts.put(workerInfo.getId(), context); // The workerInfo is locked so we can operate on its blocks without race conditions @@ -1223,6 +1538,7 @@ protected void workerRegisterStart(WorkerRegisterContext context, processWorkerOrphanedBlocks(workerInfo); workerInfo.addLostStorage(lostStorage); workerInfo.setBuildVersion(options.getBuildVersion()); + workerInfo.setNumVCpu(options.getNumVCpu()); // TODO(jiacheng): This block can be moved to a non-locked section if (options.getConfigsCount() > 0) { @@ -1241,7 +1557,10 @@ protected void workerRegisterBatch(WorkerRegisterContext context, RegisterWorker MasterWorkerInfo workerInfo = context.getWorkerInfo(); Preconditions.checkState(workerInfo != null, "No workerInfo metadata found in the WorkerRegisterContext!"); - + if (isRejected(workerInfo.getWorkerAddress())) { + throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, + workerInfo.getWorkerAddress())); + } // Even if we add the BlockLocation before the workerInfo is fully registered, // it should be fine because the block can be read on this workerInfo. // If the stream fails in the middle, the blocks recorded on the MasterWorkerInfo @@ -1259,7 +1578,10 @@ public void workerRegisterFinish(WorkerRegisterContext context) { MasterWorkerInfo workerInfo = context.getWorkerInfo(); Preconditions.checkState(workerInfo != null, "No workerInfo metadata found in the WorkerRegisterContext!"); - + if (isRejected(workerInfo.getWorkerAddress())) { + throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, + workerInfo.getWorkerAddress())); + } // Detect any lost blocks on this workerInfo. Set removedBlocks; if (workerInfo.mIsRegistered) { @@ -1298,15 +1620,42 @@ public Command workerHeartbeat(long workerId, Map capacityBytesOnT List metrics) { MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker == null) { + /* + * If the worker is not recognized: + * 1. The worker never registered to the cluster, or the master has restarted/failover + * 2. The worker has been decommissioned and removed from the active worker list + */ + worker = mDecommissionedWorkers.getFirstByField(ID_INDEX, workerId); + if (worker != null) { + WorkerNetAddress workerAddr = worker.getWorkerAddress(); + if (isRejected(worker.getWorkerAddress())) { + LOG.info("Received heartbeat from a disabled worker {}", + workerAddr.getHost() + ":" + workerAddr.getRpcPort()); + return Command.newBuilder().setCommandType(CommandType.Disabled).build(); + } + LOG.info("Received heartbeat from a decommissioned worker {}", + workerAddr.getHost() + ":" + workerAddr.getRpcPort()); + return Command.newBuilder().setCommandType(CommandType.Decommissioned).build(); + } LOG.warn("Could not find worker id: {} for heartbeat.", workerId); return Command.newBuilder().setCommandType(CommandType.Register).build(); } + if (isRejected(worker.getWorkerAddress())) { + throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, + worker.getWorkerAddress())); + } // Update the TS before the heartbeat so even if the worker heartbeat processing // is time-consuming or triggers GC, the worker does not get marked as lost // by the LostWorkerDetectionHeartbeatExecutor worker.updateLastUpdatedTimeMs(); + if (mWorkerRegisterToAllMasters && mPrimarySelector.getStateUnsafe() == NodeState.STANDBY) { + waitBlockIdPresent( + addedBlocks.values().stream().flatMap(Collection::stream) + .collect(Collectors.toList()), workerId); + } + // The address is final, no need for locking processWorkerMetrics(worker.getWorkerAddress().getHost(), metrics); @@ -1327,7 +1676,7 @@ public Command workerHeartbeat(long workerId, Map capacityBytesOnT processWorkerRemovedBlocks(worker, removedBlockIds, false); processWorkerAddedBlocks(worker, addedBlocks); Set toRemoveBlocks = worker.getToRemoveBlocks(); - if (toRemoveBlocks.isEmpty()) { + if (toRemoveBlocks.isEmpty() || mPrimarySelector.getStateUnsafe() == NodeState.STANDBY) { workerCommand = Command.newBuilder().setCommandType(CommandType.Nothing).build(); } else { workerCommand = Command.newBuilder().setCommandType(CommandType.Free) @@ -1344,6 +1693,47 @@ public Command workerHeartbeat(long workerId, Map capacityBytesOnT return workerCommand; } + /** + * Waits for the block id being presents. + * If workers register to standby masters, when a block is created, + * heartbeats might come before the standby applies the journal. + * To prevent this, we wait as best efforts before ignore unknown block ids. + */ + private void waitBlockIdPresent(Collection blockIds, long workerId) { + final List blockIdsToWait = new ArrayList<>(); + for (long addedBlockId : blockIds) { + if (!mBlockMetaStore.getBlock(addedBlockId).isPresent()) { + blockIdsToWait.add(addedBlockId); + } + } + try { + CommonUtils.waitFor( + "Wait for blocks being committed on master before adding block locations", + () -> { + for (long blockId: blockIdsToWait) { + if (!mBlockMetaStore.getBlock(blockId).isPresent()) { + return false; + } + } + return true; + }, + WaitForOptions.defaults().setInterval(200).setTimeoutMs(1000) + ); + } catch (InterruptedException | TimeoutException e) { + StringBuilder sb = new StringBuilder(); + sb.append("["); + for (long blockIdToWait : blockIdsToWait) { + if (!mBlockMetaStore.getBlock(blockIdToWait).isPresent()) { + sb.append(blockIdToWait); + sb.append(" ,"); + } + } + sb.append("]"); + LOG.warn("Adding block ids {} for worker {} but these blocks don't exist. " + + "These blocks will be ignored", sb, workerId); + } + } + @Override public Clock getClock() { return mClock; @@ -1411,7 +1801,7 @@ private void processWorkerAddedBlocks(MasterWorkerInfo workerInfo, Preconditions.checkState(location.getWorkerId() == workerInfo.getId(), "BlockLocation has a different workerId %s from the request sender's workerId %s", location.getWorkerId(), workerInfo.getId()); - mBlockMetaStore.addLocation(blockId, location); + mBlockMetaStore.addLocation(blockId, BlockLocationUtils.getCached(location)); mLostBlocks.remove(blockId); } else { invalidBlockCount++; @@ -1512,6 +1902,9 @@ private Optional generateBlockInfo(long blockId) throws UnavailableEx List locations = new ArrayList<>(blockLocations.size()); for (BlockLocation location : blockLocations) { + // Decommissioned workers are not included in the available locations + // Note that this may introduce a short unavailability on the block, before + // this worker registers again (and wipes out the decommissioned state). MasterWorkerInfo workerInfo = mWorkers.getFirstByField(ID_INDEX, location.getWorkerId()); if (workerInfo != null) { @@ -1548,7 +1941,7 @@ public final class LostWorkerDetectionHeartbeatExecutor implements HeartbeatExec public LostWorkerDetectionHeartbeatExecutor() {} @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { long masterWorkerTimeoutMs = Configuration.getMs(PropertyKey.MASTER_WORKER_TIMEOUT_MS); long masterWorkerDeleteTimeoutMs = Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_DELETION_TIMEOUT_MS); @@ -1566,10 +1959,22 @@ public void heartbeat() { } for (MasterWorkerInfo worker : mLostWorkers) { try (LockResource r = worker.lockWorkerMeta( - EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { + EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { final long lastUpdate = mClock.millis() - worker.getLastUpdatedTimeMs(); if ((lastUpdate - masterWorkerTimeoutMs) > masterWorkerDeleteTimeoutMs) { - LOG.error("The worker {}({}) timed out after {}ms without a heartbeat! " + LOG.error("The lost worker {}({}) timed out after {}ms without a heartbeat! " + + "Master will forget about this worker.", worker.getId(), + worker.getWorkerAddress(), lastUpdate); + deleteWorkerMetadata(worker); + } + } + } + for (MasterWorkerInfo worker : mDecommissionedWorkers) { + try (LockResource r = worker.lockWorkerMeta( + EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { + final long lastUpdate = mClock.millis() - worker.getLastUpdatedTimeMs(); + if ((lastUpdate - masterWorkerTimeoutMs) > masterWorkerDeleteTimeoutMs) { + LOG.error("The decommissioned worker {}({}) timed out after {}ms without a heartbeat! " + "Master will forget about this worker.", worker.getId(), worker.getWorkerAddress(), lastUpdate); deleteWorkerMetadata(worker); @@ -1609,6 +2014,10 @@ public void forgetAllWorkers() { private void processLostWorker(MasterWorkerInfo worker) { mLostWorkers.add(worker); mWorkers.remove(worker); + // Invalidate cache to trigger new build of worker info list + mWorkerInfoCache.invalidate(WORKER_INFO_CACHE_KEY); + // If a worker is gone before registering, avoid it getting stuck in mTempWorker forever + mTempWorkers.remove(worker); WorkerNetAddress workerAddress = worker.getWorkerAddress(); for (Consumer
function : mWorkerLostListeners) { function.accept(new Address(workerAddress.getHost(), workerAddress.getRpcPort())); @@ -1617,22 +2026,21 @@ private void processLostWorker(MasterWorkerInfo worker) { // mark these blocks to-remove from the worker. // So if the worker comes back again the blocks are kept. processWorkerRemovedBlocks(worker, worker.getBlocks(), false); + BlockLocationUtils.evictByWorkerId(worker.getId()); } private void deleteWorkerMetadata(MasterWorkerInfo worker) { mWorkers.remove(worker); mLostWorkers.remove(worker); + // If a worker is gone before registering, avoid it getting stuck in mTempWorker forever mTempWorkers.remove(worker); + mDecommissionedWorkers.remove(worker); WorkerNetAddress workerAddress = worker.getWorkerAddress(); for (Consumer
function : mWorkerDeleteListeners) { function.accept(new Address(workerAddress.getHost(), workerAddress.getRpcPort())); } } - private void processFreedWorker(MasterWorkerInfo worker) { - mDecommissionedWorkers.remove(worker); - } - LockResource lockBlock(long blockId) { return new LockResource(mBlockLocks.get(blockId)); } @@ -1739,8 +2147,19 @@ public static void registerGauges(final DefaultBlockMaster master) { master::getWorkerCount); MetricsSystem.registerGaugeIfAbsent(MetricKey.CLUSTER_LOST_WORKERS.getName(), master::getLostWorkerCount); + + MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_CACHED_BLOCK_LOCATIONS.getName(), + BlockLocationUtils::getCachedBlockLocationSize); } private Metrics() {} // prevent instantiation } + + /** + * @return the block meta store + */ + @VisibleForTesting + public BlockMetaStore getBlockMetaStore() { + return mBlockMetaStore; + } } diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMasterContainerIdJournaled.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMasterContainerIdJournaled.java new file mode 100644 index 000000000000..363e7c510883 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMasterContainerIdJournaled.java @@ -0,0 +1,25 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.block; + +import alluxio.master.journal.SingleEntryJournaled; +import alluxio.master.journal.checkpoint.CheckpointName; + +/** + * Writes a single journal - essential to the DefaultBlockMaster - entry to a checkpoint. + */ +public class DefaultBlockMasterContainerIdJournaled extends SingleEntryJournaled { + @Override + public CheckpointName getCheckpointName() { + return CheckpointName.BLOCK_MASTER_CONTAINER_ID; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/block/RegisterStreamObserver.java b/core/server/master/src/main/java/alluxio/master/block/RegisterStreamObserver.java index 30ff4104cf98..03b45d98b849 100644 --- a/core/server/master/src/main/java/alluxio/master/block/RegisterStreamObserver.java +++ b/core/server/master/src/main/java/alluxio/master/block/RegisterStreamObserver.java @@ -12,6 +12,7 @@ package alluxio.master.block; import alluxio.RpcUtils; +import alluxio.annotation.SuppressFBWarnings; import alluxio.conf.PropertyKey; import alluxio.exception.status.DeadlineExceededException; import alluxio.grpc.GrpcExceptionUtils; @@ -25,6 +26,7 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.concurrent.GuardedBy; /** * This class handles the master side logic of the register stream. @@ -36,7 +38,11 @@ public class RegisterStreamObserver implements StreamObserver { private static final Logger LOG = LoggerFactory.getLogger(RegisterStreamObserver.class); - private WorkerRegisterContext mContext; + @GuardedBy("this") + @SuppressFBWarnings(value = "IS_FIELD_NOT_GUARDED") + // Context is initialized on the 1st request so later requests are guaranteed to see the context + // Locking is applied on init and cleanup + private volatile WorkerRegisterContext mContext; private final BlockMaster mBlockMaster; // Used to send responses to the worker private final StreamObserver mMasterResponseObserver; diff --git a/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java b/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java index b8bde41ddd38..ea5204075226 100644 --- a/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java +++ b/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java @@ -17,6 +17,7 @@ import alluxio.client.block.options.GetWorkerReportOptions.WorkerInfoField; import alluxio.grpc.BuildVersion; import alluxio.grpc.StorageList; +import alluxio.master.WorkerState; import alluxio.master.block.DefaultBlockMaster; import alluxio.resource.LockResource; import alluxio.util.CommonUtils; @@ -38,6 +39,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReadWriteLock; @@ -111,24 +113,25 @@ * and block removal/commit. * 2. In {@link alluxio.master.block.WorkerRegisterContext}, * to write locks are held throughout the lifecycle. - * 3. In {@link DefaultBlockMaster.LostWorkerDetectionHeartbeatExecutor#heartbeat()} + * 3. In {@link DefaultBlockMaster.LostWorkerDetectionHeartbeatExecutor#heartbeat(long)} */ @NotThreadSafe public final class MasterWorkerInfo { private static final Logger LOG = LoggerFactory.getLogger(MasterWorkerInfo.class); - private static final String LIVE_WORKER_STATE = "In Service"; - private static final String LOST_WORKER_STATE = "Out of Service"; private static final EnumSet USAGE_INFO_FIELDS = EnumSet.of(WorkerInfoField.WORKER_CAPACITY_BYTES, WorkerInfoField.WORKER_CAPACITY_BYTES_ON_TIERS, WorkerInfoField.WORKER_USED_BYTES, - WorkerInfoField.WORKER_USED_BYTES_ON_TIERS); + WorkerInfoField.WORKER_USED_BYTES_ON_TIERS, + WorkerInfoField.NUM_VCPU); /** Worker's last updated time in ms. */ private final AtomicLong mLastUpdatedTimeMs; /** Worker's build version (including version and revision). */ private final AtomicReference mBuildVersion; + /** Worker's number of available processors. */ + private final AtomicInteger mNumVCpu; /** Worker metadata, this field is thread safe. */ private final StaticWorkerMeta mMeta; @@ -169,6 +172,7 @@ public MasterWorkerInfo(long id, WorkerNetAddress address) { mToRemoveBlocks = new LongOpenHashSet(); mLastUpdatedTimeMs = new AtomicLong(CommonUtils.getCurrentMs()); mBuildVersion = new AtomicReference<>(BuildVersion.getDefaultInstance()); + mNumVCpu = new AtomicInteger(); // Init all locks mStatusLock = new StampedLock().asReadWriteLock(); @@ -301,10 +305,10 @@ public void addLostStorage(Map lostStorage) { * The required locks will be determined internally based on the fields. * * @param fieldRange the client selected fields - * @param isLiveWorker the worker is live or not + * @param workerState the worker state * @return generated worker information */ - public WorkerInfo generateWorkerInfo(Set fieldRange, boolean isLiveWorker) { + public WorkerInfo generateWorkerInfo(Set fieldRange, WorkerState workerState) { WorkerInfo info = new WorkerInfo(); for (WorkerInfoField field : fieldRange) { switch (field) { @@ -331,11 +335,7 @@ public WorkerInfo generateWorkerInfo(Set fieldRange, boolean is info.setStartTimeMs(mMeta.mStartTimeMs); break; case STATE: - if (isLiveWorker) { - info.setState(LIVE_WORKER_STATE); - } else { - info.setState(LOST_WORKER_STATE); - } + info.setState(workerState.toString()); break; case WORKER_USED_BYTES: info.setUsedBytes(mUsage.mUsedBytes); @@ -348,6 +348,9 @@ public WorkerInfo generateWorkerInfo(Set fieldRange, boolean is info.setVersion(v.getVersion()); info.setRevision(v.getRevision()); break; + case NUM_VCPU: + info.setNumVCpu(mNumVCpu.get()); + break; default: LOG.warn("Unrecognized worker info field: " + field); } @@ -542,7 +545,9 @@ public String toString() { .add("blocks", LOG.isDebugEnabled() ? mBlocks : CommonUtils.summarizeCollection(mBlocks)) .add("lostStorage", mUsage.mLostStorage) .add("version", buildVersion.getVersion()) - .add("revision", buildVersion.getRevision()).toString(); + .add("revision", buildVersion.getRevision()) + .add("numVCpu", mNumVCpu) + .toString(); } /** @@ -730,4 +735,22 @@ public void setBuildVersion(BuildVersion buildVersion) { public BuildVersion getBuildVersion() { return mBuildVersion.get(); } + + /** + * Sets the number of available processors of the worker. + * + * @param numVCpu the number of available processors + */ + public void setNumVCpu(int numVCpu) { + mNumVCpu.set(numVCpu); + } + + /** + * Get the number of available processors on the worker. + * + * @return the number of available processors + */ + public int getNumVCpu() { + return mNumVCpu.get(); + } } diff --git a/core/server/master/src/main/java/alluxio/master/file/BlockIntegrityChecker.java b/core/server/master/src/main/java/alluxio/master/file/BlockIntegrityChecker.java index 24334a592eb0..46370d2f91f2 100644 --- a/core/server/master/src/main/java/alluxio/master/file/BlockIntegrityChecker.java +++ b/core/server/master/src/main/java/alluxio/master/file/BlockIntegrityChecker.java @@ -39,7 +39,7 @@ public BlockIntegrityChecker(FileSystemMaster fsm) { } @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { try { mFileSystemMaster.validateInodeBlocks(mRepair); } catch (Exception e) { diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index fe2a2e92324a..ae5a931b77db 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -20,6 +20,7 @@ import alluxio.ClientContext; import alluxio.Constants; import alluxio.Server; +import alluxio.client.file.FileSystemContext; import alluxio.client.job.JobMasterClient; import alluxio.client.job.JobMasterClientPool; import alluxio.clock.SystemClock; @@ -27,8 +28,6 @@ import alluxio.collections.PrefixList; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; -import alluxio.conf.Reconfigurable; -import alluxio.conf.ReconfigurableRegistry; import alluxio.exception.AccessControlException; import alluxio.exception.AlluxioException; import alluxio.exception.BlockInfoException; @@ -41,6 +40,7 @@ import alluxio.exception.InvalidFileSizeException; import alluxio.exception.InvalidPathException; import alluxio.exception.UnexpectedAlluxioException; +import alluxio.exception.runtime.NotFoundRuntimeException; import alluxio.exception.status.FailedPreconditionException; import alluxio.exception.status.InvalidArgumentException; import alluxio.exception.status.NotFoundException; @@ -48,9 +48,11 @@ import alluxio.exception.status.ResourceExhaustedException; import alluxio.exception.status.UnavailableException; import alluxio.file.options.DescendantType; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.DeletePOptions; import alluxio.grpc.FileSystemMasterCommonPOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.GrpcService; import alluxio.grpc.GrpcUtils; import alluxio.grpc.LoadDescendantPType; @@ -60,7 +62,10 @@ import alluxio.grpc.ServiceType; import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.TtlAction; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatThread; import alluxio.job.plan.persist.PersistConfig; @@ -92,7 +97,10 @@ import alluxio.master.file.contexts.ScheduleAsyncPersistenceContext; import alluxio.master.file.contexts.SetAclContext; import alluxio.master.file.contexts.SetAttributeContext; +import alluxio.master.file.contexts.SyncMetadataContext; import alluxio.master.file.contexts.WorkerHeartbeatContext; +import alluxio.master.file.mdsync.DefaultSyncProcess; +import alluxio.master.file.mdsync.TaskGroup; import alluxio.master.file.meta.FileSystemMasterView; import alluxio.master.file.meta.Inode; import alluxio.master.file.meta.InodeDirectory; @@ -119,10 +127,14 @@ import alluxio.master.journal.JournaledGroup; import alluxio.master.journal.NoopJournalContext; import alluxio.master.journal.checkpoint.CheckpointName; +import alluxio.master.journal.ufs.UfsJournalSystem; import alluxio.master.metastore.DelegatingReadOnlyInodeStore; import alluxio.master.metastore.InodeStore; import alluxio.master.metastore.ReadOnlyInodeStore; import alluxio.master.metrics.TimeSeriesStore; +import alluxio.master.scheduler.DefaultWorkerProvider; +import alluxio.master.scheduler.JournaledJobMetaStore; +import alluxio.master.scheduler.Scheduler; import alluxio.metrics.Metric; import alluxio.metrics.MetricInfo; import alluxio.metrics.MetricKey; @@ -144,7 +156,7 @@ import alluxio.retry.RetryPolicy; import alluxio.security.authentication.AuthType; import alluxio.security.authentication.AuthenticatedClientUser; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.security.authorization.AclEntry; import alluxio.security.authorization.AclEntryType; import alluxio.security.authorization.Mode; @@ -211,6 +223,8 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.SortedMap; import java.util.Spliterators; @@ -222,6 +236,7 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -235,7 +250,7 @@ */ @NotThreadSafe // TODO(jiri): make thread-safe (c.f. ALLUXIO-1664) public class DefaultFileSystemMaster extends CoreMaster - implements FileSystemMaster, DelegatingJournaled, Reconfigurable { + implements FileSystemMaster, DelegatingJournaled { private static final Logger LOG = LoggerFactory.getLogger(DefaultFileSystemMaster.class); private static final Set> DEPS = ImmutableSet.of(BlockMaster.class); @@ -346,7 +361,7 @@ public class DefaultFileSystemMaster extends CoreMaster private final BlockMaster mBlockMaster; /** This manages the file system inode structure. This must be journaled. */ - private final InodeTree mInodeTree; + protected final InodeTree mInodeTree; /** Store for holding inodes. */ private final ReadOnlyInodeStore mInodeStore; @@ -396,15 +411,16 @@ public class DefaultFileSystemMaster extends CoreMaster private final ActiveSyncManager mSyncManager; /** Log writer for user access audit log. */ - private AsyncUserAccessAuditLogWriter mAsyncAuditLogWriter; + protected volatile AsyncUserAccessAuditLogWriter mAsyncAuditLogWriter; /** Stores the time series for various metrics which are exposed in the UI. */ private final TimeSeriesStore mTimeSeriesStore; - private final AccessTimeUpdater mAccessTimeUpdater; + @Nullable private final AccessTimeUpdater mAccessTimeUpdater; /** Used to check pending/running backup from RPCs. */ - private final CallTracker mStateLockCallTracker; + protected final CallTracker mStateLockCallTracker; + private final Scheduler mScheduler; final Clock mClock; @@ -442,6 +458,8 @@ public class DefaultFileSystemMaster extends CoreMaster ThreadFactoryUtils.build("alluxio-ufs-active-sync-%d", false)); private HeartbeatThread mReplicationCheckHeartbeatThread; + private final DefaultSyncProcess mDefaultSyncProcess; + /** * Creates a new instance of {@link DefaultFileSystemMaster}. * @@ -503,11 +521,19 @@ public Type getType() { mUfsBlockLocationCache = UfsBlockLocationCache.Factory.create(mMountTable); mSyncManager = new ActiveSyncManager(mMountTable, this); mTimeSeriesStore = new TimeSeriesStore(); - mAccessTimeUpdater = new AccessTimeUpdater(this, mInodeTree, masterContext.getJournalSystem()); + mAccessTimeUpdater = + Configuration.getBoolean(PropertyKey.MASTER_FILE_ACCESS_TIME_UPDATER_ENABLED) + ? new AccessTimeUpdater( + this, mInodeTree, masterContext.getJournalSystem()) : null; // Sync executors should allow core threads to time out mSyncPrefetchExecutor.allowCoreThreadTimeOut(true); mSyncMetadataExecutor.allowCoreThreadTimeOut(true); mActiveSyncMetadataExecutor.allowCoreThreadTimeOut(true); + FileSystemContext schedulerFsContext = FileSystemContext.create(); + JournaledJobMetaStore jobMetaStore = new JournaledJobMetaStore(this); + mScheduler = new Scheduler(new DefaultWorkerProvider(this, schedulerFsContext), jobMetaStore); + mDefaultSyncProcess = createSyncProcess( + mInodeStore, mMountTable, mInodeTree, getSyncPathCache()); // The mount table should come after the inode tree because restoring the mount table requires // that the inode tree is already restored. @@ -518,6 +544,7 @@ public Type getType() { add(mMountTable); add(mUfsManager); add(mSyncManager); + add(jobMetaStore); } }; mJournaledGroup = new JournaledGroup(journaledComponents, CheckpointName.FILE_SYSTEM_MASTER); @@ -531,6 +558,10 @@ public Type getType() { MetricsSystem.registerCachedGaugeIfAbsent( MetricsSystem.getMetricName(MetricKey.MASTER_METADATA_SYNC_EXECUTOR_QUEUE_SIZE.getName()), () -> mSyncMetadataExecutor.getQueue().size(), 2, TimeUnit.SECONDS); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_AUDIT_LOG_ENTRIES_SIZE.getName(), + () -> mAsyncAuditLogWriter != null + ? mAsyncAuditLogWriter.getAuditLogEntriesSize() : -1); } private static MountInfo getRootMountInfo(MasterUfsManager ufsManager) { @@ -561,11 +592,14 @@ private static MountInfo getRootMountInfo(MasterUfsManager ufsManager) { public Map getServices() { Map services = new HashMap<>(); services.put(ServiceType.FILE_SYSTEM_MASTER_CLIENT_SERVICE, new GrpcService(ServerInterceptors - .intercept(new FileSystemMasterClientServiceHandler(this), new ClientIpAddressInjector()))); - services.put(ServiceType.FILE_SYSTEM_MASTER_JOB_SERVICE, - new GrpcService(new FileSystemMasterJobServiceHandler(this))); - services.put(ServiceType.FILE_SYSTEM_MASTER_WORKER_SERVICE, - new GrpcService(new FileSystemMasterWorkerServiceHandler(this))); + .intercept(new FileSystemMasterClientServiceHandler(this, mScheduler), + new ClientContextServerInjector()))); + services.put(ServiceType.FILE_SYSTEM_MASTER_JOB_SERVICE, new GrpcService(ServerInterceptors + .intercept(new FileSystemMasterJobServiceHandler(this), + new ClientContextServerInjector()))); + services.put(ServiceType.FILE_SYSTEM_MASTER_WORKER_SERVICE, new GrpcService(ServerInterceptors + .intercept(new FileSystemMasterWorkerServiceHandler(this), + new ClientContextServerInjector()))); return services; } @@ -586,8 +620,21 @@ public Journaled getDelegate() { @Override public JournalContext createJournalContext() throws UnavailableException { + return createJournalContext(false); + } + + /** + * Creates a journal context. + * @param useMergeJournalContext if set to true, if possible, a journal context that merges + * journal entries and holds them until the context is closed. If set to false, + * a normal journal context will be returned. + * @return the journal context + */ + @VisibleForTesting + JournalContext createJournalContext(boolean useMergeJournalContext) + throws UnavailableException { JournalContext context = super.createJournalContext(); - if (!mMergeInodeJournals) { + if (!(mMergeInodeJournals && useMergeJournalContext)) { return context; } return new FileSystemMergeJournalContext( @@ -688,31 +735,36 @@ public void start(Boolean isPrimary) throws IOException { if (blockIntegrityCheckInterval > 0) { // negative or zero interval implies disabled getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_BLOCK_INTEGRITY_CHECK, - new BlockIntegrityChecker(this), blockIntegrityCheckInterval, + new BlockIntegrityChecker(this), () -> + new FixedIntervalSupplier(Configuration.getMs( + PropertyKey.MASTER_PERIODIC_BLOCK_INTEGRITY_CHECK_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); } getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_TTL_CHECK, new InodeTtlChecker(this, mInodeTree), - Configuration.getMs(PropertyKey.MASTER_TTL_CHECKER_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_TTL_CHECKER_INTERVAL_MS)), Configuration.global(), mMasterContext.getUserState())); getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_LOST_FILES_DETECTION, new LostFileDetector(this, mBlockMaster, mInodeTree), - Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_FILE_DETECTION_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_FILE_DETECTION_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); mReplicationCheckHeartbeatThread = new HeartbeatThread( HeartbeatContext.MASTER_REPLICATION_CHECK, new alluxio.master.file.replication.ReplicationChecker(mInodeTree, mBlockMaster, mSafeModeManager, mJobMasterClientPool), - Configuration.getMs(PropertyKey.MASTER_REPLICATION_CHECK_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_REPLICATION_CHECK_INTERVAL_MS)), Configuration.global(), mMasterContext.getUserState()); - ReconfigurableRegistry.register(this); getExecutorService().submit(mReplicationCheckHeartbeatThread); getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_PERSISTENCE_SCHEDULER, new PersistenceScheduler(), - Configuration.getMs(PropertyKey.MASTER_PERSISTENCE_SCHEDULER_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_PERSISTENCE_SCHEDULER_INTERVAL_MS)), Configuration.global(), mMasterContext.getUserState())); mPersistCheckerPool = new java.util.concurrent.ThreadPoolExecutor(PERSIST_CHECKER_POOL_THREADS, @@ -723,40 +775,49 @@ public void start(Boolean isPrimary) throws IOException { getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_PERSISTENCE_CHECKER, new PersistenceChecker(), - Configuration.getMs(PropertyKey.MASTER_PERSISTENCE_CHECKER_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_PERSISTENCE_CHECKER_INTERVAL_MS)), Configuration.global(), mMasterContext.getUserState())); getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_METRICS_TIME_SERIES, new TimeSeriesRecorder(), - Configuration.getMs(PropertyKey.MASTER_METRICS_TIME_SERIES_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_METRICS_TIME_SERIES_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); - if (Configuration.getBoolean(PropertyKey.MASTER_AUDIT_LOGGING_ENABLED)) { - mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("AUDIT_LOG"); - mAsyncAuditLogWriter.start(); - MetricsSystem.registerGaugeIfAbsent( - MetricKey.MASTER_AUDIT_LOG_ENTRIES_SIZE.getName(), - () -> mAsyncAuditLogWriter != null - ? mAsyncAuditLogWriter.getAuditLogEntriesSize() : -1); - } if (Configuration.getBoolean(PropertyKey.UNDERFS_CLEANUP_ENABLED)) { getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_UFS_CLEANUP, new UfsCleaner(this), - Configuration.getMs(PropertyKey.UNDERFS_CLEANUP_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.UNDERFS_CLEANUP_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); } - mAccessTimeUpdater.start(); + if (mAccessTimeUpdater != null) { + mAccessTimeUpdater.start(); + } mSyncManager.start(); + mScheduler.start(); } + /** + * The audit logger will be running all the time, and an operation checks whether + * to enable audit logs in {@link #createAuditContext}. So audit log can be turned on/off + * at runtime by updating the property key. + */ + mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("AUDIT_LOG"); + mAsyncAuditLogWriter.start(); } @Override public void stop() throws IOException { + LOG.info("Next directory id before close: {}", mDirectoryIdGenerator.peekDirectoryId()); if (mAsyncAuditLogWriter != null) { mAsyncAuditLogWriter.stop(); mAsyncAuditLogWriter = null; } mSyncManager.stop(); - mAccessTimeUpdater.stop(); + if (mAccessTimeUpdater != null) { + mAccessTimeUpdater.stop(); + } + mScheduler.stop(); super.stop(); } @@ -788,7 +849,6 @@ public void close() throws IOException { Thread.currentThread().interrupt(); LOG.warn("Failed to wait for active sync executor to shut down."); } - ReconfigurableRegistry.unregister(this); } @Override @@ -903,8 +963,9 @@ public FileInfo getFileInfo(AlluxioURI path, GetStatusContext context) FileSystemMasterCommonPOptions.newBuilder() .setTtl(context.getOptions().getCommonOptions().getTtl()) .setTtlAction(context.getOptions().getCommonOptions().getTtlAction()))); - /* - See the comments in #getFileIdInternal for an explanation on why the loop here is required. + /** + * See the comments in {@link #getFileIdInternal(AlluxioURI, boolean)} for an explanation + * on why the loop here is required. */ boolean run = true; boolean loadMetadata = false; @@ -950,8 +1011,7 @@ public FileInfo getFileInfo(AlluxioURI path, GetStatusContext context) Mode.Bits accessMode = Mode.Bits.fromProto(context.getOptions().getAccessMode()); if (context.getOptions().getUpdateTimestamps() && context.getOptions().hasAccessMode() && (accessMode.imply(Mode.Bits.READ) || accessMode.imply(Mode.Bits.WRITE))) { - mAccessTimeUpdater.updateAccessTime(rpcContext.getJournalContext(), - inodePath.getInode(), opTimeMs); + updateAccessTime(rpcContext, inodePath.getInode(), opTimeMs); } auditContext.setSrcInode(inodePath.getInode()).setSucceeded(true); ret = fileInfo; @@ -968,14 +1028,15 @@ public long getMountIdFromUfsPath(AlluxioURI ufsPath) { private FileInfo getFileInfoInternal(LockedInodePath inodePath) throws UnavailableException, FileDoesNotExistException { - return getFileInfoInternal(inodePath, null); + return getFileInfoInternal(inodePath, null, false); } /** * @param inodePath the {@link LockedInodePath} to get the {@link FileInfo} for * @return the {@link FileInfo} for the given inode */ - private FileInfo getFileInfoInternal(LockedInodePath inodePath, Counter counter) + private FileInfo getFileInfoInternal(LockedInodePath inodePath, Counter counter, + boolean excludeMountInfo) throws FileDoesNotExistException, UnavailableException { int inMemoryPercentage; int inAlluxioPercentage; @@ -995,7 +1056,7 @@ private FileInfo getFileInfoInternal(LockedInodePath inodePath, Counter counter) List fileBlockInfos = new ArrayList<>(blockInfos.size()); for (BlockInfo blockInfo : blockInfos) { - fileBlockInfos.add(generateFileBlockInfo(inodePath, blockInfo)); + fileBlockInfos.add(generateFileBlockInfo(inodePath, blockInfo, excludeMountInfo)); } fileInfo.setFileBlockInfos(fileBlockInfos); } @@ -1012,30 +1073,32 @@ private FileInfo getFileInfoInternal(LockedInodePath inodePath, Counter counter) mBlockMaster.removeBlocks(fileInfo.getBlockIds(), true); // Commit all the file blocks (without locations) so the metadata for the block exists. commitBlockInfosForFile( - fileInfo.getBlockIds(), fileInfo.getLength(), fileInfo.getBlockSizeBytes()); + fileInfo.getBlockIds(), fileInfo.getLength(), fileInfo.getBlockSizeBytes(), null); // Reset file-block-info list with the new list. try { - fileInfo.setFileBlockInfos(getFileBlockInfoListInternal(inodePath)); + fileInfo.setFileBlockInfos(getFileBlockInfoListInternal(inodePath, excludeMountInfo)); } catch (InvalidPathException e) { throw new FileDoesNotExistException( String.format("Hydration failed for file: %s", inodePath.getUri()), e); } } fileInfo.setXAttr(inode.getXAttr()); - MountTable.Resolution resolution; - try { - resolution = mMountTable.resolve(uri); - } catch (InvalidPathException e) { - throw new FileDoesNotExistException(e.getMessage(), e); - } - AlluxioURI resolvedUri = resolution.getUri(); - fileInfo.setUfsPath(resolvedUri.toString()); - fileInfo.setMountId(resolution.getMountId()); - if (counter == null) { - Metrics.getUfsOpsSavedCounter(resolution.getUfsMountPointUri(), - Metrics.UFSOps.GET_FILE_INFO).inc(); - } else { - counter.inc(); + if (!excludeMountInfo) { + MountTable.Resolution resolution; + try { + resolution = mMountTable.resolve(uri); + } catch (InvalidPathException e) { + throw new FileDoesNotExistException(e.getMessage(), e); + } + AlluxioURI resolvedUri = resolution.getUri(); + fileInfo.setUfsPath(resolvedUri.toString()); + fileInfo.setMountId(resolution.getMountId()); + if (counter == null) { + Metrics.getUfsOpsSavedCounter(resolution.getUfsMountPointUri(), + Metrics.UFSOps.GET_FILE_INFO).inc(); + } else { + counter.inc(); + } } Metrics.FILE_INFOS_GOT.inc(); @@ -1058,7 +1121,9 @@ public void listStatus(AlluxioURI path, ListStatusContext context, Metrics.GET_FILE_INFO_OPS.inc(); LockingScheme lockingScheme = new LockingScheme(path, LockPattern.READ, false); boolean ufsAccessed = false; - try (RpcContext rpcContext = createRpcContext(context); + // List status might journal inode access time update journals. + // We want these journals to be added to the async writer immediately instead of being merged. + try (RpcContext rpcContext = createNonMergingJournalRpcContext(context); FileSystemMasterAuditContext auditContext = createAuditContext("listStatus", path, null, null)) { @@ -1070,8 +1135,9 @@ public void listStatus(AlluxioURI path, ListStatusContext context, context.getOptions().setLoadMetadataType(LoadMetadataPType.NEVER); ufsAccessed = true; } - /* - See the comments in #getFileIdInternal for an explanation on why the loop here is required. + /** + * See the comments in {@link #getFileIdInternal(AlluxioURI, boolean)} for an explanation + * on why the loop here is required. */ DescendantType loadDescendantType; if (context.getOptions().getLoadMetadataType() == LoadMetadataPType.NEVER) { @@ -1093,7 +1159,7 @@ public void listStatus(AlluxioURI path, ListStatusContext context, boolean run = true; while (run) { run = false; - if (loadMetadata) { + if (loadMetadata && !context.isDisableMetadataSync()) { loadMetadataIfNotExist(rpcContext, path, loadMetadataContext); ufsAccessed = true; } @@ -1111,12 +1177,12 @@ public void listStatus(AlluxioURI path, ListStatusContext context, auditContext.setAllowed(false); throw e; } - if (!loadMetadata) { + if (!loadMetadata && !context.isDisableMetadataSync()) { Inode inode; boolean isLoaded = true; if (inodePath.fullPathExists()) { inode = inodePath.getInode(); - if (inode.isDirectory() + if (inode.isDirectory() && !context.getOptions().getDisableAreDescendantsLoadedCheck() && context.getOptions().getLoadMetadataType() != LoadMetadataPType.ALWAYS) { InodeDirectory inodeDirectory = inode.asDirectory(); isLoaded = inodeDirectory.isDirectChildrenLoaded(); @@ -1141,13 +1207,15 @@ public void listStatus(AlluxioURI path, ListStatusContext context, ensureFullPathAndUpdateCache(inodePath); auditContext.setSrcInode(inodePath.getInode()); - MountTable.Resolution resolution; + MountTable.Resolution resolution = null; if (!context.getOptions().hasLoadMetadataOnly() || !context.getOptions().getLoadMetadataOnly()) { DescendantType descendantTypeForListStatus = (context.getOptions().getRecursive()) ? DescendantType.ALL : DescendantType.ONE; try { - resolution = mMountTable.resolve(path); + if (!context.getOptions().getExcludeMountInfo()) { + resolution = mMountTable.resolve(path); + } } catch (InvalidPathException e) { throw new FileDoesNotExistException(e.getMessage(), e); } @@ -1167,11 +1235,11 @@ public void listStatus(AlluxioURI path, ListStatusContext context, } // perform the listing listStatusInternal(context, rpcContext, inodePath, auditContext, - descendantTypeForListStatus, resultStream, 0, - Metrics.getUfsOpsSavedCounter(resolution.getUfsMountPointUri(), - Metrics.UFSOps.GET_FILE_INFO), + descendantTypeForListStatus, resultStream, 0, resolution == null ? null : + Metrics.getUfsOpsSavedCounter(resolution.getUfsMountPointUri(), + Metrics.UFSOps.GET_FILE_INFO), partialPathNames, prefixComponents); - if (!ufsAccessed) { + if (!ufsAccessed && resolution != null) { Metrics.getUfsOpsSavedCounter(resolution.getUfsMountPointUri(), Metrics.UFSOps.LIST_STATUS).inc(); } @@ -1214,7 +1282,7 @@ public List listStatus(AlluxioURI path, ListStatusContext context) private void listStatusInternal( ListStatusContext context, RpcContext rpcContext, LockedInodePath currInodePath, AuditContext auditContext, DescendantType descendantType, ResultStream resultStream, - int depth, Counter counter, List partialPath, + int depth, @Nullable Counter counter, List partialPath, List prefixComponents) throws FileDoesNotExistException, UnavailableException, AccessControlException, InvalidPathException { @@ -1223,6 +1291,7 @@ private void listStatusInternal( if (context.donePartialListing()) { return; } + // The item should be listed if: // 1. We are not doing a partial listing, or have reached the start of the partial listing // (partialPath is empty) @@ -1237,7 +1306,8 @@ private void listStatusInternal( // at this depth. if ((depth != 0 || inode.isFile()) && prefixComponents.size() <= depth) { if (context.listedItem()) { - resultStream.submit(getFileInfoInternal(currInodePath, counter)); + resultStream.submit(getFileInfoInternal(currInodePath, counter, + context.getOptions().getExcludeMountInfo())); } if (context.isDoneListing()) { return; @@ -1262,8 +1332,7 @@ private void listStatusInternal( // in the remaining recursive calls, so we set partialPath to the empty list partialPath = Collections.emptyList(); } - mAccessTimeUpdater.updateAccessTime(rpcContext.getJournalContext(), inode, - CommonUtils.getCurrentMs()); + updateAccessTime(rpcContext, inode, CommonUtils.getCurrentMs()); DescendantType nextDescendantType = (descendantType == DescendantType.ALL) ? DescendantType.ALL : DescendantType.NONE; try (CloseableIterator childrenIterator = getChildrenIterator( @@ -1286,7 +1355,7 @@ private void listStatusInternal( try (LockedInodePath childInodePath = currInodePath.lockChildByName( - childName, LockPattern.READ, childComponentsHint)) { + childName, LockPattern.READ, childComponentsHint, true)) { listStatusInternal(context, rpcContext, childInodePath, auditContext, nextDescendantType, resultStream, depth + 1, counter, partialPath, prefixComponents); @@ -1334,7 +1403,7 @@ private void checkLoadMetadataOptions(LoadMetadataPType loadMetadataType, Alluxi } } - private boolean areDescendantsLoaded(InodeDirectoryView inode) { + protected boolean areDescendantsLoaded(InodeDirectoryView inode) { if (!inode.isDirectChildrenLoaded()) { return false; } @@ -1357,7 +1426,7 @@ private boolean areDescendantsLoaded(InodeDirectoryView inode) { * * @param inodePath the path to ensure */ - private void ensureFullPathAndUpdateCache(LockedInodePath inodePath) + protected void ensureFullPathAndUpdateCache(LockedInodePath inodePath) throws InvalidPathException, FileDoesNotExistException { boolean exists = false; try { @@ -1455,8 +1524,9 @@ public boolean exists(AlluxioURI path, ExistsContext context) LoadMetadataPOptions.newBuilder() .setCommonOptions(context.getOptions().getCommonOptions()) .setLoadType(context.getOptions().getLoadMetadataType())); - /* - See the comments in #getFileIdInternal for an explanation on why the loop here is required. + /** + * See the comments in {@link #getFileIdInternal(AlluxioURI, boolean)} for an explanation + * on why the loop here is required. */ boolean run = true; boolean loadMetadata = false; @@ -1612,7 +1682,8 @@ public void completeFile(AlluxioURI path, CompleteFileContext context) UnavailableException { if (isOperationComplete(context)) { Metrics.COMPLETED_OPERATION_RETRIED_COUNT.inc(); - LOG.warn("A completed \"completeFile\" operation has been retried. {}", context); + LOG.warn("A completed \"completeFile\" operation has been retried. OperationContext={}", + context); return; } Metrics.COMPLETE_FILE_OPS.inc(); @@ -1638,7 +1709,7 @@ public void completeFile(AlluxioURI path, CompleteFileContext context) } // Even readonly mount points should be able to complete a file, for UFS reads in CACHE mode. completeFileInternal(rpcContext, inodePath, context); - // Schedule async persistence if requested. + // Inode completion check is skipped because we know the file we completed is complete. if (context.getOptions().hasAsyncPersistOptions()) { scheduleAsyncPersistenceInternal(inodePath, ScheduleAsyncPersistenceContext .create(context.getOptions().getAsyncPersistOptionsBuilder()), rpcContext); @@ -1648,6 +1719,50 @@ public void completeFile(AlluxioURI path, CompleteFileContext context) } } + /** + * Creates a completed file for metadata sync. + * This method is more efficient than a combination of individual + * createFile() and completeFile() methods, with less journal entries generated and + * less frequent metadata store updates. + * @param rpcContext the rpc context for journaling + * @param inodePath the inode path + * @param createFileContext the create file context + * @param ufsStatus the ufs status, used to generate fingerprint + * @return the path of inodes to the created node + */ + public List createCompleteFileInternalForMetadataSync( + RpcContext rpcContext, LockedInodePath inodePath, CreateFileContext createFileContext, + UfsFileStatus ufsStatus + ) + throws InvalidPathException, FileDoesNotExistException, FileAlreadyExistsException, + BlockInfoException, IOException { + long containerId = mBlockMaster.getNewContainerId(); + List blockIds = new ArrayList<>(); + + int sequenceNumber = 0; + long ufsLength = ufsStatus.getContentLength(); + long remainingBytes = ufsLength; + long blockSize = createFileContext.getOptions().getBlockSizeBytes(); + while (remainingBytes > 0) { + blockIds.add(BlockId.createBlockId(containerId, sequenceNumber)); + remainingBytes -= Math.min(remainingBytes, blockSize); + sequenceNumber++; + } + createFileContext.setCompleteFileInfo( + new CreateFileContext.CompleteFileInfo(containerId, ufsLength, blockIds) + ); + createFileContext.setMetadataLoad(true, false); + createFileContext.setFingerprint(getUfsFingerprint(inodePath.getUri(), ufsStatus, null)); + + // Ufs absent cache is updated in the metadata syncer when a request processing is done, + // so ufs absent cache is not updated here. + List inodes = createFileInternal(rpcContext, inodePath, createFileContext, false); + + commitBlockInfosForFile(blockIds, ufsLength, blockSize, rpcContext.getJournalContext()); + mUfsAbsentPathCache.processExisting(inodePath.getUri()); + return inodes; + } + /** * Completes a file. After a file is completed, it cannot be written to. * @@ -1692,19 +1807,9 @@ void completeFileInternal(RpcContext rpcContext, LockedInodePath inodePath, String ufsFingerprint = Constants.INVALID_UFS_FINGERPRINT; if (fileInode.isPersisted()) { - UfsStatus ufsStatus = context.getUfsStatus(); - // Retrieve the UFS fingerprint for this file. - MountTable.Resolution resolution = mMountTable.resolve(inodePath.getUri()); - AlluxioURI resolvedUri = resolution.getUri(); - String ufsPath = resolvedUri.toString(); - try (CloseableResource ufsResource = resolution.acquireUfsResource()) { - UnderFileSystem ufs = ufsResource.get(); - if (ufsStatus == null) { - ufsFingerprint = ufs.getParsedFingerprint(ufsPath).serialize(); - } else { - ufsFingerprint = Fingerprint.create(ufs.getUnderFSType(), ufsStatus).serialize(); - } - } + String contentHash = context.getOptions().hasContentHash() + ? context.getOptions().getContentHash() : null; + ufsFingerprint = getUfsFingerprint(inodePath.getUri(), context.getUfsStatus(), contentHash); } completeFileInternal(rpcContext, inodePath, length, context.getOperationTimeMs(), @@ -1754,7 +1859,8 @@ private void completeFileInternal(RpcContext rpcContext, LockedInodePath inodePa if (inode.isPersisted()) { // Commit all the file blocks (without locations) so the metadata for the block exists. - commitBlockInfosForFile(entry.getSetBlocksList(), length, inode.getBlockSizeBytes()); + commitBlockInfosForFile(entry.getSetBlocksList(), length, inode.getBlockSizeBytes(), + rpcContext.getJournalContext()); // The path exists in UFS, so it is no longer absent mUfsAbsentPathCache.processExisting(inodePath.getUri()); } @@ -1773,6 +1879,23 @@ private void completeFileInternal(RpcContext rpcContext, LockedInodePath inodePa Metrics.FILES_COMPLETED.inc(); } + String getUfsFingerprint( + AlluxioURI uri, @Nullable UfsStatus ufsStatus, @Nullable String contentHash) + throws InvalidPathException { + // Retrieve the UFS fingerprint for this file. + MountTable.Resolution resolution = mMountTable.resolve(uri); + AlluxioURI resolvedUri = resolution.getUri(); + String ufsPath = resolvedUri.toString(); + try (CloseableResource ufsResource = resolution.acquireUfsResource()) { + UnderFileSystem ufs = ufsResource.get(); + if (ufsStatus == null) { + return ufs.getParsedFingerprint(ufsPath, contentHash).serialize(); + } else { + return Fingerprint.create(ufs.getUnderFSType(), ufsStatus).serialize(); + } + } + } + /** * Queries InodeTree's operation cache and see if this operation has recently * been applied to its persistent state. @@ -1800,13 +1923,21 @@ private void cacheOperation(OperationContext opContext) { * @param blockIds the list of block ids * @param fileLength length of the file in bytes * @param blockSize the block size in bytes + * @param context the journal context, if null a new context will be created */ - private void commitBlockInfosForFile(List blockIds, long fileLength, long blockSize) - throws UnavailableException { + private void commitBlockInfosForFile(List blockIds, long fileLength, long blockSize, + @Nullable JournalContext context) throws UnavailableException { long currLength = fileLength; for (long blockId : blockIds) { long currentBlockSize = Math.min(currLength, blockSize); - mBlockMaster.commitBlockInUFS(blockId, currentBlockSize); + // if we are not using the UFS journal system, we can use the same journal context + // for the block info so that we do not have to create a new journal + // context and flush again + if (context != null && !(mJournalSystem instanceof UfsJournalSystem)) { + mBlockMaster.commitBlockInUFS(blockId, currentBlockSize, context, false); + } else { + mBlockMaster.commitBlockInUFS(blockId, currentBlockSize); + } currLength -= currentBlockSize; } } @@ -1817,7 +1948,8 @@ public FileInfo createFile(AlluxioURI path, CreateFileContext context) BlockInfoException, IOException, FileDoesNotExistException { if (isOperationComplete(context)) { Metrics.COMPLETED_OPERATION_RETRIED_COUNT.inc(); - LOG.warn("A completed \"createFile\" operation has been retried. {}", context); + LOG.warn("A completed \"createFile\" operation has been retried. OperationContext={}", + context); return getFileInfo(path, GetStatusContext.create(GetStatusPOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(-1)) @@ -1858,7 +1990,8 @@ public FileInfo createFile(AlluxioURI path, CreateFileContext context) // Check if ufs is writable checkUfsMode(path, OperationType.WRITE); } - createFileInternal(rpcContext, inodePath, context); + deleteFileIfOverwrite(rpcContext, inodePath, context); + createFileInternal(rpcContext, inodePath, context, true); auditContext.setSrcInode(inodePath.getInode()).setSucceeded(true); cacheOperation(context); return getFileInfoInternal(inodePath); @@ -1866,6 +1999,41 @@ public FileInfo createFile(AlluxioURI path, CreateFileContext context) } } + /** + * @param rpcContext the rpc context + * @param inodePath the path to be created + * @param context the method context + */ + private void deleteFileIfOverwrite(RpcContext rpcContext, LockedInodePath inodePath, + CreateFileContext context) + throws FileDoesNotExistException, IOException, InvalidPathException, + FileAlreadyExistsException { + if (inodePath.fullPathExists()) { + Inode currentInode = inodePath.getInode(); + if (!context.getOptions().hasOverwrite() || !context.getOptions().getOverwrite()) { + throw new FileAlreadyExistsException( + ExceptionMessage.CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE.getMessage( + inodePath.getUri())); + } + // if the fullpath is a file and the option is to overwrite, delete it + if (currentInode.isDirectory()) { + throw new FileAlreadyExistsException( + ExceptionMessage.CANNOT_OVERWRITE_DIRECTORY.getMessage(inodePath.getUri())); + } else { + try { + deleteInternal(rpcContext, inodePath, DeleteContext.mergeFrom( + DeletePOptions.newBuilder().setRecursive(true) + .setAlluxioOnly(!context.isPersisted())), true); + inodePath.removeLastInode(); + } catch (DirectoryNotEmptyException e) { + // Should not reach here + throw new InvalidPathException( + ExceptionMessage.CANNOT_OVERWRITE_DIRECTORY.getMessage(inodePath.getUri())); + } + } + } + } + /** * @param rpcContext the rpc context * @param inodePath the path to be created @@ -1873,7 +2041,7 @@ public FileInfo createFile(AlluxioURI path, CreateFileContext context) * @return the list of created inodes */ List createFileInternal(RpcContext rpcContext, LockedInodePath inodePath, - CreateFileContext context) + CreateFileContext context, boolean updateUfsAbsentCache) throws InvalidPathException, FileAlreadyExistsException, BlockInfoException, IOException, FileDoesNotExistException { if (mWhitelist.inList(inodePath.getUri().toString())) { @@ -1885,7 +2053,9 @@ List createFileInternal(RpcContext rpcContext, LockedInodePath inodePath, if (context.isPersisted()) { // The path exists in UFS, so it is no longer absent. The ancestors exist in UFS, but the // actual file does not exist in UFS yet. - mUfsAbsentPathCache.processExisting(inodePath.getUri().getParent()); + if (updateUfsAbsentCache) { + mUfsAbsentPathCache.processExisting(inodePath.getUri().getParent()); + } } else { MountTable.Resolution resolution = mMountTable.resolve(inodePath.getUri()); Metrics.getUfsOpsSavedCounter(resolution.getUfsMountPointUri(), @@ -2011,7 +2181,7 @@ public void delete(AlluxioURI path, DeleteContext context) InvalidPathException, AccessControlException { if (isOperationComplete(context)) { Metrics.COMPLETED_OPERATION_RETRIED_COUNT.inc(); - LOG.warn("A completed \"delete\" operation has been retried. {}", context); + LOG.warn("A completed \"delete\" operation has been retried. OperationContext={}", context); return; } Metrics.DELETE_PATHS_OPS.inc(); @@ -2079,6 +2249,12 @@ public void delete(AlluxioURI path, DeleteContext context) } deleteInternal(rpcContext, inodePath, context, false); + if (context.getOptions().getAlluxioOnly() + && context.getOptions().hasSyncParentNextTime()) { + boolean syncParentNextTime = context.getOptions().getSyncParentNextTime(); + mInodeTree.setDirectChildrenLoaded( + rpcContext, inodePath.getParentInodeDirectory(), !syncParentNextTime); + } auditContext.setSucceeded(true); cacheOperation(context); } @@ -2102,9 +2278,11 @@ public void delete(AlluxioURI path, DeleteContext context) * @param inodePath the file {@link LockedInodePath} * @param deleteContext the method optitions * @param bypassPermCheck whether the permission check has been done before entering this call + * @return the number of inodes deleted, and the number of inodes skipped that were unable + * to be deleted */ @VisibleForTesting - public void deleteInternal(RpcContext rpcContext, LockedInodePath inodePath, + public Pair deleteInternal(RpcContext rpcContext, LockedInodePath inodePath, DeleteContext deleteContext, boolean bypassPermCheck) throws FileDoesNotExistException, IOException, DirectoryNotEmptyException, InvalidPathException { Preconditions.checkState(inodePath.getLockPattern() == LockPattern.WRITE_EDGE); @@ -2112,14 +2290,21 @@ public void deleteInternal(RpcContext rpcContext, LockedInodePath inodePath, // TODO(jiri): A crash after any UFS object is deleted and before the delete operation is // journaled will result in an inconsistency between Alluxio and UFS. if (!inodePath.fullPathExists()) { - return; + return new Pair<>(0, 0); } long opTimeMs = mClock.millis(); Inode inode = inodePath.getInode(); if (inode == null) { - return; + return new Pair<>(0, 0); } + if (deleteContext.isSkipNotPersisted() && inode.isFile()) { + InodeFile inodeFile = inode.asFile(); + // skip deleting a non persisted file + if (!inodeFile.isPersisted() || !inodeFile.isCompleted()) { + return new Pair<>(0, 1); + } + } boolean recursive = deleteContext.getOptions().getRecursive(); if (inode.isDirectory() && !recursive && mInodeStore.hasChildren(inode.asDirectory())) { // inode is nonempty, and we don't want to delete a nonempty directory unless recursive is @@ -2150,12 +2335,22 @@ public void deleteInternal(RpcContext rpcContext, LockedInodePath inodePath, Set unsafeParentInodes = new HashSet<>(); // Alluxio URIs (and the reason for failure) which could not be deleted List> failedUris = new ArrayList<>(); + int inodeToDeleteUnsafeCount = 0; try (LockedInodePathList descendants = mInodeTree.getDescendants(inodePath)) { // This walks the tree in a DFS flavor, first all the children in a subtree, // then the sibling trees one by one. // Therefore, we first see a parent, then all its children. for (LockedInodePath childPath : descendants) { + // Check if we should skip non-persisted files + if (deleteContext.isSkipNotPersisted() && childPath.getInode().isFile()) { + InodeFile childInode = childPath.getInode().asFile(); + if (!childInode.isCompleted() || !childInode.isPersisted()) { + unsafeInodes.add(childInode.getId()); + unsafeParentInodes.add(childInode.getParentId()); + continue; + } + } if (bypassPermCheck) { inodesToDelete.add(new Pair<>(mInodeTree.getPath(childPath.getInode()), childPath)); } else { @@ -2240,6 +2435,7 @@ public void deleteInternal(RpcContext rpcContext, LockedInodePath inodePath, // Something went wrong with this path so it cannot be removed normally // Remove the path from further processing inodesToDelete.set(i, null); + inodeToDeleteUnsafeCount++; } } @@ -2270,11 +2466,17 @@ public void deleteInternal(RpcContext rpcContext, LockedInodePath inodePath, } } - if (!failedUris.isEmpty()) { + if (!failedUris.isEmpty() && !deleteContext.isSkipNotPersisted()) { throw new FailedPreconditionException(buildDeleteFailureMessage(failedUris)); } } Metrics.PATHS_DELETED.inc(inodesToDelete.size()); + int inodeSkipped = unsafeInodes.size(); + if (!unsafeInodes.isEmpty()) { + // remove 1 because we added the parent of the path being deleted + inodeSkipped--; + } + return new Pair<>(inodesToDelete.size() - inodeToDeleteUnsafeCount, inodeSkipped); } private String buildDeleteFailureMessage(List> failedUris) { @@ -2313,7 +2515,7 @@ public List getFileBlockInfoList(AlluxioURI path) auditContext.setAllowed(false); throw e; } - List ret = getFileBlockInfoListInternal(inodePath); + List ret = getFileBlockInfoListInternal(inodePath, false); Metrics.FILE_BLOCK_INFOS_GOT.inc(); auditContext.setSucceeded(true); return ret; @@ -2322,16 +2524,18 @@ public List getFileBlockInfoList(AlluxioURI path) /** * @param inodePath the {@link LockedInodePath} to get the info for + * @param excludeMountInfo exclude the mount info * @return a list of {@link FileBlockInfo} for all the blocks of the given inode */ - private List getFileBlockInfoListInternal(LockedInodePath inodePath) + private List getFileBlockInfoListInternal(LockedInodePath inodePath, + boolean excludeMountInfo) throws InvalidPathException, FileDoesNotExistException, UnavailableException { InodeFile file = inodePath.getInodeFile(); List blockInfoList = mBlockMaster.getBlockInfoList(file.getBlockIds()); List ret = new ArrayList<>(blockInfoList.size()); for (BlockInfo blockInfo : blockInfoList) { - ret.add(generateFileBlockInfo(inodePath, blockInfo)); + ret.add(generateFileBlockInfo(inodePath, blockInfo, excludeMountInfo)); } return ret; } @@ -2342,9 +2546,11 @@ private List getFileBlockInfoListInternal(LockedInodePath inodePa * * @param inodePath the file the block is a part of * @param blockInfo the {@link BlockInfo} to generate the {@link FileBlockInfo} from + * @param excludeMountInfo exclude the mount info * @return a new {@link FileBlockInfo} for the block */ - private FileBlockInfo generateFileBlockInfo(LockedInodePath inodePath, BlockInfo blockInfo) + private FileBlockInfo generateFileBlockInfo(LockedInodePath inodePath, BlockInfo blockInfo, + boolean excludeMountInfo) throws FileDoesNotExistException { InodeFile file = inodePath.getInodeFile(); FileBlockInfo fileBlockInfo = new FileBlockInfo(); @@ -2355,7 +2561,8 @@ private FileBlockInfo generateFileBlockInfo(LockedInodePath inodePath, BlockInfo long offset = file.getBlockSizeBytes() * BlockId.getSequenceNumber(blockInfo.getBlockId()); fileBlockInfo.setOffset(offset); - if (fileBlockInfo.getBlockInfo().getLocations().isEmpty() && file.isPersisted()) { + if (!excludeMountInfo && fileBlockInfo.getBlockInfo().getLocations().isEmpty() + && file.isPersisted()) { // No alluxio locations, but there is a checkpoint in the under storage system. Add the // locations from the under storage system. long blockId = fileBlockInfo.getBlockInfo().getBlockId(); @@ -2591,7 +2798,8 @@ public long createDirectory(AlluxioURI path, CreateDirectoryContext context) FileDoesNotExistException { if (isOperationComplete(context)) { Metrics.COMPLETED_OPERATION_RETRIED_COUNT.inc(); - LOG.warn("A completed \"createDirectory\" operation has been retried. {}", context); + LOG.warn("A completed \"createDirectory\" operation has been retried. OperationContext={}", + context); return getFileInfo(path, GetStatusContext.create(GetStatusPOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(-1)) @@ -2652,7 +2860,7 @@ public long createDirectory(AlluxioURI path, CreateDirectoryContext context) * @param context method context * @return a list of created inodes */ - List createDirectoryInternal(RpcContext rpcContext, LockedInodePath inodePath, + public List createDirectoryInternal(RpcContext rpcContext, LockedInodePath inodePath, UfsManager.UfsClient ufsClient, AlluxioURI ufsUri, CreateDirectoryContext context) throws InvalidPathException, FileAlreadyExistsException, IOException, FileDoesNotExistException { Preconditions.checkState(inodePath.getLockPattern() == LockPattern.WRITE_EDGE); @@ -2718,7 +2926,7 @@ public void rename(AlluxioURI srcPath, AlluxioURI dstPath, RenameContext context IOException, AccessControlException { if (isOperationComplete(context)) { Metrics.COMPLETED_OPERATION_RETRIED_COUNT.inc(); - LOG.warn("A completed \"rename\" operation has been retried. {}", context); + LOG.warn("A completed \"rename\" operation has been retried. OperationContext={}", context); return; } Metrics.RENAME_PATH_OPS.inc(); @@ -3065,7 +3273,7 @@ private boolean checkForOverwriteSyntax(RpcContext rpcContext, try { deleteInternal(rpcContext, dstInodePath, DeleteContext .mergeFrom(DeletePOptions.newBuilder() - .setRecursive(true).setAlluxioOnly(context.getPersist())), true); + .setRecursive(true).setAlluxioOnly(!context.getPersist())), true); dstInodePath.removeLastInode(); } catch (DirectoryNotEmptyException ex) { // IGNORE, this will never happen @@ -3254,7 +3462,7 @@ public Set getLostFiles() { * @param path the path to load metadata for * @param context the {@link LoadMetadataContext} */ - private void loadMetadataIfNotExist(RpcContext rpcContext, AlluxioURI path, + protected void loadMetadataIfNotExist(RpcContext rpcContext, AlluxioURI path, LoadMetadataContext context) throws InvalidPathException, AccessControlException { DescendantType syncDescendantType = @@ -3400,6 +3608,11 @@ public void mount(AlluxioURI alluxioPath, AlluxioURI ufsPath, MountContext conte } mMountTable.checkUnderWritableMountPoint(alluxioPath); + if (context.getOptions().getRemount()) { + LOG.info("Mount {} with remount options, so it will be unmounted first.", + inodePath.getUri()); + unmountInternal(rpcContext, inodePath); + } mountInternal(rpcContext, inodePath, ufsPath, context); auditContext.setSucceeded(true); Metrics.PATHS_MOUNTED.inc(); @@ -3841,18 +4054,31 @@ public void scheduleAsyncPersistence(AlluxioURI path, ScheduleAsyncPersistenceCo mInodeTree .lockFullInodePath(path, LockPattern.WRITE_INODE, rpcContext.getJournalContext()) ) { + InodeFile inode = inodePath.getInodeFile(); + if (!inode.isCompleted()) { + throw new InvalidPathException( + "Cannot persist an incomplete Alluxio file: " + inodePath.getUri()); + } scheduleAsyncPersistenceInternal(inodePath, context, rpcContext); } } + /** + * Persists an inode asynchronously. + * This method does not do the completion check. When this method is invoked, + * please make sure the inode has been completed. + * Currently, two places call this method. One is completeFile(), where we know that + * the file is completed. Another place is scheduleAsyncPersistence(), where we check + * if the inode is completed and throws an exception if it is not. + * @param inodePath the locked inode path + * @param context the context + * @param rpcContext the rpc context + * @throws FileDoesNotExistException if the file does not exist + */ private void scheduleAsyncPersistenceInternal(LockedInodePath inodePath, ScheduleAsyncPersistenceContext context, RpcContext rpcContext) - throws InvalidPathException, FileDoesNotExistException { + throws FileDoesNotExistException { InodeFile inode = inodePath.getInodeFile(); - if (!inode.isCompleted()) { - throw new InvalidPathException( - "Cannot persist an incomplete Alluxio file: " + inodePath.getUri()); - } if (shouldPersistPath(inodePath.toString())) { mInodeTree.updateInode(rpcContext, UpdateInodeEntry.newBuilder().setId(inode.getId()) .setPersistenceState(PersistenceState.TO_BE_PERSISTED.name()).build()); @@ -3987,15 +4213,67 @@ InodeSyncStream.SyncStatus syncMetadata(RpcContext rpcContext, AlluxioURI path, } @Override - public void update() { - if (mReplicationCheckHeartbeatThread != null) { - long newValue = Configuration.getMs( - PropertyKey.MASTER_REPLICATION_CHECK_INTERVAL_MS); - mReplicationCheckHeartbeatThread.updateIntervalMs( - newValue); - LOG.info("The interval of {} updated to {}", - HeartbeatContext.MASTER_REPLICATION_CHECK, newValue); + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataContext context) + throws InvalidPathException { + TaskGroup task = mDefaultSyncProcess.syncPath(path, + GrpcUtils.fromProto(context.getOptions().getLoadDescendantType()), + GrpcUtils.fromProto(context.getOptions().getDirectoryLoadType()), 0, null, true); + try { + task.waitAllComplete(0); + } catch (Throwable t) { + LOG.error("Sync metadata failed for task group {}", task.getGroupId(), t); + } + return SyncMetadataPResponse.newBuilder().addAllTask( + task.toProtoTasks().collect(Collectors.toList())).build(); + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataContext context) + throws InvalidPathException, IOException { + TaskGroup result = mDefaultSyncProcess.syncPath(path, + GrpcUtils.fromProto(context.getOptions().getLoadDescendantType()), + GrpcUtils.fromProto(context.getOptions().getDirectoryLoadType()), 0, null, true); + return SyncMetadataAsyncPResponse.newBuilder() + .setSubmitted(true) + .setTaskGroupId(result.getGroupId()) + .addAllTaskIds(result.getTasks().map(it -> it.getTaskInfo().getId()) + .collect(Collectors.toSet())) + .build(); + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) { + Optional task = mDefaultSyncProcess.getTaskGroup(taskGroupId); + if (!task.isPresent()) { + throw new NotFoundRuntimeException("Task group id " + taskGroupId + " not found"); + } + GetSyncProgressPResponse.Builder responseBuilder = GetSyncProgressPResponse.newBuilder(); + responseBuilder.addAllTask(task.get().toProtoTasks().collect(Collectors.toList())); + + return responseBuilder.build(); + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) throws NotFoundException { + Optional group = mDefaultSyncProcess.getTaskGroup(taskGroupId); + if (!group.isPresent()) { + throw new NotFoundRuntimeException("Task group id " + taskGroupId + " not found"); + } + Optional ex = group.get().getTasks().map(baseTask -> { + try { + mDefaultSyncProcess.getTaskTracker().cancelTaskById(baseTask.getTaskInfo().getId()); + return null; + } catch (NotFoundException e) { + return e; + } + }).filter(Objects::nonNull).reduce((acc, e) -> { + acc.addSuppressed(e); + return acc; + }); + if (ex.isPresent()) { + throw ex.get(); } + return CancelSyncMetadataPResponse.newBuilder().build(); } @FunctionalInterface @@ -4061,16 +4339,21 @@ public FileSystemCommand workerHeartbeat(long workerId, List persistedFile } /** + * @param rpcContext the rpc context * @param inodePath the {@link LockedInodePath} to use * @param updateUfs whether to update the UFS with the attribute change * @param opTimeMs the operation time (in milliseconds) * @param context the method context */ - protected void setAttributeSingleFile(RpcContext rpcContext, LockedInodePath inodePath, + public void setAttributeSingleFile(RpcContext rpcContext, LockedInodePath inodePath, boolean updateUfs, long opTimeMs, SetAttributeContext context) throws FileDoesNotExistException, InvalidPathException, AccessControlException { Inode inode = inodePath.getInode(); SetAttributePOptions.Builder protoOptions = context.getOptions(); + if (inode.isDirectory() && protoOptions.hasDirectChildrenLoaded()) { + mInodeTree.setDirectChildrenLoaded( + rpcContext, inode.asDirectory(), protoOptions.getDirectChildrenLoaded()); + } if (protoOptions.hasPinned()) { mInodeTree.setPinned(rpcContext, inodePath, context.getOptions().getPinned(), context.getOptions().getPinnedMediaList(), opTimeMs); @@ -4346,10 +4629,10 @@ public void close() {} // Nothing to clean up * * @param fileId the file ID */ - private void handleExpired(long fileId) throws AlluxioException, UnavailableException { - try (JournalContext journalContext = createJournalContext(); - LockedInodePath inodePath = mInodeTree - .lockFullInodePath(fileId, LockPattern.WRITE_INODE, journalContext)) { + private void handleExpired(long fileId, JournalContext journalContext, + AtomicInteger journalCount) throws AlluxioException { + try (LockedInodePath inodePath = mInodeTree + .lockFullInodePath(fileId, LockPattern.WRITE_INODE, journalContext)) { InodeFile inode = inodePath.getInodeFile(); switch (inode.getPersistenceState()) { case LOST: @@ -4370,6 +4653,7 @@ private void handleExpired(long fileId) throws AlluxioException, UnavailableExce .setPersistJobId(Constants.PERSISTENCE_INVALID_JOB_ID) .setTempUfsPath(Constants.PERSISTENCE_INVALID_UFS_PATH) .build()); + journalCount.addAndGet(2); break; default: throw new IllegalStateException( @@ -4383,7 +4667,8 @@ private void handleExpired(long fileId) throws AlluxioException, UnavailableExce * * @param fileId the file ID */ - private void handleReady(long fileId) throws AlluxioException, IOException { + private void handleReady(long fileId, JournalContext journalContext, AtomicInteger journalCount) + throws AlluxioException, IOException { alluxio.time.ExponentialTimer timer = mPersistRequests.get(fileId); // Lookup relevant file information. AlluxioURI uri; @@ -4449,15 +4734,15 @@ private void handleReady(long fileId) throws AlluxioException, IOException { mPersistJobs.put(fileId, new PersistJob(jobId, fileId, uri, tempUfsPath, timer)); // Update the inode and journal the change. - try (JournalContext journalContext = createJournalContext(); - LockedInodePath inodePath = mInodeTree - .lockFullInodePath(fileId, LockPattern.WRITE_INODE, journalContext)) { + try (LockedInodePath inodePath = mInodeTree + .lockFullInodePath(fileId, LockPattern.WRITE_INODE, journalContext)) { InodeFile inode = inodePath.getInodeFile(); mInodeTree.updateInodeFile(journalContext, UpdateInodeFileEntry.newBuilder() .setId(inode.getId()) .setPersistJobId(jobId) .setTempUfsPath(tempUfsPath) .build()); + journalCount.incrementAndGet(); } } @@ -4471,79 +4756,95 @@ private void handleReady(long fileId) throws AlluxioException, IOException { * @throws InterruptedException if the thread is interrupted */ @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { LOG.debug("Async Persist heartbeat start"); java.util.concurrent.TimeUnit.SECONDS.sleep(mQuietPeriodSeconds); - // Process persist requests. - for (long fileId : mPersistRequests.keySet()) { - // Throw if interrupted. - if (Thread.interrupted()) { - throw new InterruptedException("PersistenceScheduler interrupted."); - } - boolean remove = true; - alluxio.time.ExponentialTimer timer = mPersistRequests.get(fileId); - if (timer == null) { - // This could occur if a key is removed from mPersistRequests while we are iterating. - continue; - } - alluxio.time.ExponentialTimer.Result timerResult = timer.tick(); - if (timerResult == alluxio.time.ExponentialTimer.Result.NOT_READY) { - // operation is not ready to be scheduled - continue; - } - AlluxioURI uri = null; - try { - try (LockedInodePath inodePath = mInodeTree - .lockFullInodePath(fileId, LockPattern.READ, NoopJournalContext.INSTANCE)) { - uri = inodePath.getUri(); - } catch (FileDoesNotExistException e) { - LOG.debug("The file (id={}) to be persisted was not found. Likely this file has been " - + "removed by users", fileId, e); - continue; + AtomicInteger journalCounter = new AtomicInteger(0); + try (JournalContext journalContext = createJournalContext()) { + // Process persist requests. + for (long fileId : mPersistRequests.keySet()) { + if (journalCounter.get() > 100) { + // The only exception thrown from flush() will be UnavailableException + // See catch (UnavailableException e) + journalContext.flush(); + journalCounter.set(0); } - try { - checkUfsMode(uri, OperationType.WRITE); - } catch (Exception e) { - LOG.warn("Unable to schedule persist request for path {}: {}", uri, e.toString()); - // Retry when ufs mode permits operation - remove = false; + // Throw if interrupted. + if (Thread.interrupted()) { + throw new InterruptedException("PersistenceScheduler interrupted."); + } + boolean remove = true; + alluxio.time.ExponentialTimer timer = mPersistRequests.get(fileId); + if (timer == null) { + // This could occur if a key is removed from mPersistRequests while we are iterating. continue; } - switch (timerResult) { - case EXPIRED: - handleExpired(fileId); - break; - case READY: - handleReady(fileId); - break; - default: - throw new IllegalStateException("Unrecognized timer state: " + timerResult); + alluxio.time.ExponentialTimer.Result timerResult = timer.tick(); + if (timerResult == alluxio.time.ExponentialTimer.Result.NOT_READY) { + // operation is not ready to be scheduled + continue; } - } catch (FileDoesNotExistException | InvalidPathException e) { - LOG.warn("The file {} (id={}) to be persisted was not found : {}", uri, fileId, - e.toString()); - LOG.debug("Exception: ", e); - } catch (UnavailableException e) { - LOG.warn("Failed to persist file {}, will retry later: {}", uri, e.toString()); - remove = false; - } catch (ResourceExhaustedException e) { - LOG.warn("The job service is busy, will retry later: {}", e.toString()); - LOG.debug("Exception: ", e); - mQuietPeriodSeconds = (mQuietPeriodSeconds == 0) ? 1 : - Math.min(MAX_QUIET_PERIOD_SECONDS, mQuietPeriodSeconds * 2); - remove = false; - // End the method here until the next heartbeat. No more jobs should be scheduled during - // the current heartbeat if the job master is at full capacity. - return; - } catch (Exception e) { - LOG.warn("Unexpected exception encountered when scheduling the persist job for file {} " - + "(id={}) : {}", uri, fileId, e.toString()); - LOG.debug("Exception: ", e); - } finally { - if (remove) { - mPersistRequests.remove(fileId); + AlluxioURI uri = null; + try { + try (LockedInodePath inodePath = mInodeTree + .lockFullInodePath(fileId, LockPattern.READ, NoopJournalContext.INSTANCE)) { + uri = inodePath.getUri(); + } catch (FileDoesNotExistException e) { + LOG.debug("The file (id={}) to be persisted was not found. Likely this file has been " + + "removed by users", fileId, e); + continue; + } + try { + checkUfsMode(uri, OperationType.WRITE); + } catch (Exception e) { + LOG.warn("Unable to schedule persist request for path {}: {}", uri, e.toString()); + // Retry when ufs mode permits operation + remove = false; + continue; + } + switch (timerResult) { + case EXPIRED: + handleExpired(fileId, journalContext, journalCounter); + break; + case READY: + handleReady(fileId, journalContext, journalCounter); + break; + default: + throw new IllegalStateException("Unrecognized timer state: " + timerResult); + } + } catch (FileDoesNotExistException | InvalidPathException e) { + LOG.warn("The file {} (id={}) to be persisted was not found : {}", uri, fileId, + e.toString()); + LOG.debug("Exception: ", e); + } catch (ResourceExhaustedException e) { + LOG.warn("The job service is busy, will retry later: {}", e.toString()); + LOG.debug("Exception: ", e); + mQuietPeriodSeconds = (mQuietPeriodSeconds == 0) ? 1 : + Math.min(MAX_QUIET_PERIOD_SECONDS, mQuietPeriodSeconds * 2); + remove = false; + // End the method here until the next heartbeat. No more jobs should be scheduled during + // the current heartbeat if the job master is at full capacity. + return; + } catch (Exception e) { + LOG.warn("Unexpected exception encountered when scheduling the persist job for file {} " + + "(id={}) : {}", uri, fileId, e.toString()); + LOG.debug("Exception: ", e); + } finally { + if (remove) { + mPersistRequests.remove(fileId); + } } } + } catch (UnavailableException e) { + // Two ways to arrive here: + // 1. createJournalContext() fails, the batch processing has not started yet + // 2. flush() fails and the queue is dirty, the JournalContext will be closed and flushed, + // but the flush will not succeed + // The context is MasterJournalContext, so an UnavailableException indicates either + // the primary failed over, or journal is closed + // In either case, it is fine to close JournalContext and throw away the journal entries + // The next primary will process all TO_BE_PERSISTED files and create new persist jobs + LOG.error("Journal is not running, cannot persist files"); } } } @@ -4574,6 +4875,9 @@ private void handleSuccess(PersistJob job) { String tempUfsPath = job.getTempUfsPath(); List blockIds = new ArrayList<>(); UfsManager.UfsClient ufsClient = null; + // This journal flush is per job and cannot be batched easily, + // because each execution is in a separate thread and this thread doesn't wait for those + // to complete try (JournalContext journalContext = createJournalContext(); LockedInodePath inodePath = mInodeTree .lockFullInodePath(fileId, LockPattern.WRITE_INODE, journalContext)) { @@ -4755,7 +5059,7 @@ private void createParentPath(List inodes, String ufsPath, } @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { boolean queueEmpty = mPersistCheckerPool.getQueue().isEmpty(); // Check the progress of persist jobs. for (long fileId : mPersistJobs.keySet()) { @@ -4843,7 +5147,7 @@ public void heartbeat() throws InterruptedException { @NotThreadSafe private final class TimeSeriesRecorder implements alluxio.heartbeat.HeartbeatExecutor { @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { // TODO(calvin): Provide a better way to keep track of metrics collected as time series MetricRegistry registry = MetricsSystem.METRIC_REGISTRY; SortedMap gauges = registry.getGauges(); @@ -5128,6 +5432,12 @@ public static void registerGauges(final UfsManager ufsManager, final InodeTree i inodeTree::getPinnedSize); MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_FILES_TO_PERSIST.getName(), () -> inodeTree.getToBePersistedIds().size()); + MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_REPLICATION_LIMITED_FILES.getName(), + () -> inodeTree.getReplicationLimitedFileIds().size()); + MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_TTL_BUCKETS.getName(), + () -> inodeTree.getTtlBuckets().getNumBuckets()); + MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_TTL_INODES.getName(), + () -> inodeTree.getTtlBuckets().getNumInodes()); MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_TOTAL_PATHS.getName(), inodeTree::getInodeCount); MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_FILE_SIZE.getName(), @@ -5186,7 +5496,7 @@ private Metrics() {} // prevent instantiation * @param srcInode the source inode of this command * @return newly-created {@link FileSystemMasterAuditContext} instance */ - private FileSystemMasterAuditContext createAuditContext(String command, AlluxioURI srcPath, + protected FileSystemMasterAuditContext createAuditContext(String command, AlluxioURI srcPath, @Nullable AlluxioURI dstPath, @Nullable Inode srcInode) { // Audit log may be enabled during runtime AsyncUserAccessAuditLogWriter auditLogWriter = null; @@ -5216,7 +5526,8 @@ private FileSystemMasterAuditContext createAuditContext(String command, AlluxioU Configuration.getEnum(PropertyKey.SECURITY_AUTHENTICATION_TYPE, AuthType.class); auditContext.setUgi(ugi) .setAuthType(authType) - .setIp(ClientIpAddressInjector.getIpAddress()) + .setIp(ClientContextServerInjector.getIpAddress()) + .setClientVersion(ClientContextServerInjector.getClientVersion()) .setCommand(command).setSrcPath(srcPath).setDstPath(dstPath) .setSrcInode(srcInode).setAllowed(true) .setCreationTimeNs(System.nanoTime()); @@ -5224,7 +5535,7 @@ private FileSystemMasterAuditContext createAuditContext(String command, AlluxioU return auditContext; } - private BlockDeletionContext createBlockDeletionContext() { + protected BlockDeletionContext createBlockDeletionContext() { return new DefaultBlockDeletionContext(this::removeBlocks, blocks -> blocks.forEach(mUfsBlockLocationCache::invalidate)); } @@ -5261,7 +5572,17 @@ public RpcContext createRpcContext() throws UnavailableException { @VisibleForTesting public RpcContext createRpcContext(OperationContext operationContext) throws UnavailableException { - return new RpcContext(createBlockDeletionContext(), createJournalContext(), + return new RpcContext(createBlockDeletionContext(), createJournalContext(true), + operationContext.withTracker(mStateLockCallTracker)); + } + + /** + * @param operationContext the operation context + * @return an Rpc context that does not use a merge journal context + */ + public RpcContext createNonMergingJournalRpcContext(OperationContext operationContext) + throws UnavailableException { + return new RpcContext(createBlockDeletionContext(), createJournalContext(false), operationContext.withTracker(mStateLockCallTracker)); } @@ -5271,13 +5592,19 @@ private LockingScheme createLockingScheme(AlluxioURI path, FileSystemMasterCommo getSyncPathCache(), DescendantType.NONE); } - private LockingScheme createSyncLockingScheme(AlluxioURI path, + protected LockingScheme createSyncLockingScheme(AlluxioURI path, FileSystemMasterCommonPOptions options, DescendantType descendantType) throws InvalidPathException { return new LockingScheme(path, LockPattern.READ, options, getSyncPathCache(), descendantType); } + protected void updateAccessTime(RpcContext rpcContext, Inode inode, long opTimeMs) { + if (mAccessTimeUpdater != null) { + mAccessTimeUpdater.updateAccessTime(rpcContext.getJournalContext(), inode, opTimeMs); + } + } + boolean isAclEnabled() { return Configuration.getBoolean(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_ENABLED); } @@ -5303,7 +5630,7 @@ public String getRootInodeOwner() { } @Override - public List getStateLockSharedWaitersAndHolders() { + public Collection getStateLockSharedWaitersAndHolders() { return mMasterContext.getStateLockManager().getSharedWaitersAndHolders(); } @@ -5327,4 +5654,25 @@ public MountTable getMountTable() { public void needsSync(AlluxioURI path) throws InvalidPathException { getSyncPathCache().notifyInvalidation(path); } + + @VisibleForTesting + protected DefaultSyncProcess createSyncProcess( + ReadOnlyInodeStore inodeStore, MountTable mountTable, + InodeTree inodeTree, UfsSyncPathCache syncPathCache) { + return new DefaultSyncProcess( + this, inodeStore, mountTable, inodeTree, syncPathCache, mUfsAbsentPathCache); + } + + @VisibleForTesting + DefaultSyncProcess getMetadataSyncer() { + return mDefaultSyncProcess; + } + + /** + * Get scheduler. + * @return scheduler + */ + public Scheduler getScheduler() { + return mScheduler; + } } diff --git a/core/server/master/src/main/java/alluxio/master/file/FileSystemJournalEntryMerger.java b/core/server/master/src/main/java/alluxio/master/file/FileSystemJournalEntryMerger.java index 5c8df09dace5..c0910746730a 100644 --- a/core/server/master/src/main/java/alluxio/master/file/FileSystemJournalEntryMerger.java +++ b/core/server/master/src/main/java/alluxio/master/file/FileSystemJournalEntryMerger.java @@ -70,6 +70,14 @@ else if ( MutableInodeDirectory.fromJournalEntry(existingEntry.getInodeDirectory()); if (entry.hasUpdateInode()) { inodeDirectory.updateFromEntry(entry.getUpdateInode()); + // Update Inode directory does not contain directory fingerprint, + // so we still need to add the new inode journal entry to the list to keep the + // fingerprint update, + // while we still merge it with the existing inode directory on as best efforts. + if (entry.getUpdateInode().hasUfsFingerprint() + && !entry.getUpdateInode().getUfsFingerprint().equals("")) { + mJournalEntries.add(entry); + } } else if (entry.hasUpdateInodeDirectory()) { inodeDirectory.updateFromEntry(entry.getUpdateInodeDirectory()); } diff --git a/core/server/master/src/main/java/alluxio/master/file/FileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/FileSystemMaster.java index a270cea637e2..22659e4d3e44 100644 --- a/core/server/master/src/main/java/alluxio/master/file/FileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/FileSystemMaster.java @@ -24,8 +24,13 @@ import alluxio.exception.InvalidPathException; import alluxio.exception.UnexpectedAlluxioException; import alluxio.exception.status.InvalidArgumentException; +import alluxio.exception.status.NotFoundException; import alluxio.exception.status.UnavailableException; +import alluxio.grpc.CancelSyncMetadataPResponse; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.SetAclAction; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.master.Master; import alluxio.master.file.contexts.CheckAccessContext; import alluxio.master.file.contexts.CheckConsistencyContext; @@ -42,6 +47,7 @@ import alluxio.master.file.contexts.ScheduleAsyncPersistenceContext; import alluxio.master.file.contexts.SetAclContext; import alluxio.master.file.contexts.SetAttributeContext; +import alluxio.master.file.contexts.SyncMetadataContext; import alluxio.master.file.contexts.WorkerHeartbeatContext; import alluxio.master.file.meta.FileSystemMasterView; import alluxio.master.file.meta.PersistenceState; @@ -627,7 +633,7 @@ void activeSyncMetadata(AlluxioURI path, Collection changedFiles, /** * @return the list of thread identifiers that are waiting and holding the state lock */ - List getStateLockSharedWaitersAndHolders(); + Collection getStateLockSharedWaitersAndHolders(); /** * Mark a path as needed synchronization with the UFS, when this path or any @@ -635,4 +641,37 @@ void activeSyncMetadata(AlluxioURI path, Collection changedFiles, * @param path the path to invalidate */ void needsSync(AlluxioURI path) throws InvalidPathException; + + /** + * Syncs the metadata of a given path. + * + * @param path the path to sync + * @param context the method context + * @return the sync metadata response + */ + SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataContext context) + throws InvalidPathException, IOException; + + /** + * Submits a metadata sync task and runs it async. + * @param path the path to sync + * @param context the method context + * @return the sync metadata async response + */ + SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataContext context) + throws InvalidPathException, IOException; + + /** + * Gets a metadata sync task progress. + * @param taskGroupId the task group id + * @return the sync progress + */ + GetSyncProgressPResponse getSyncProgress(long taskGroupId); + + /** + * Cancels an ongoing metadata sync. + * @param taskGroupId the task group id + * @return the cancel sync metadata response + */ + CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) throws NotFoundException; } diff --git a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterAuditContext.java b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterAuditContext.java index 45e2f0e0f24f..ce440da857dc 100644 --- a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterAuditContext.java +++ b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterAuditContext.java @@ -12,6 +12,8 @@ package alluxio.master.file; import alluxio.AlluxioURI; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; import alluxio.master.audit.AsyncUserAccessAuditLogWriter; import alluxio.master.audit.AuditContext; import alluxio.master.file.meta.Inode; @@ -37,6 +39,7 @@ public final class FileSystemMasterAuditContext implements AuditContext { private Inode mSrcInode; private long mCreationTimeNs; private long mExecutionTimeNs; + private String mClientVersion; @Override public FileSystemMasterAuditContext setAllowed(boolean allowed) { @@ -139,6 +142,17 @@ public FileSystemMasterAuditContext setCreationTimeNs(long creationTimeNs) { return this; } + /** + * set client version. + * + * @param version client version + * @return this {@link AuditContext} instance + */ + public FileSystemMasterAuditContext setClientVersion(String version) { + mClientVersion = version; + return this; + } + /** * Constructor of {@link FileSystemMasterAuditContext}. * @@ -160,21 +174,28 @@ public void close() { @Override public String toString() { + StringBuilder auditLog = new StringBuilder(); if (mSrcInode != null) { short mode = mSrcInode.getMode(); - return String.format( + auditLog.append(String.format( "succeeded=%b\tallowed=%b\tugi=%s (AUTH=%s)\tip=%s\tcmd=%s\tsrc=%s\tdst=%s\t" + "perm=%s:%s:%s%s%s\texecutionTimeUs=%d", mSucceeded, mAllowed, mUgi, mAuthType, mIp, mCommand, mSrcPath, mDstPath, mSrcInode.getOwner(), mSrcInode.getGroup(), Mode.extractOwnerBits(mode), Mode.extractGroupBits(mode), Mode.extractOtherBits(mode), - mExecutionTimeNs / 1000); + mExecutionTimeNs / 1000)); } else { - return String.format( + auditLog.append(String.format( "succeeded=%b\tallowed=%b\tugi=%s (AUTH=%s)\tip=%s\tcmd=%s\tsrc=%s\tdst=%s\t" + "perm=null\texecutionTimeUs=%d", mSucceeded, mAllowed, mUgi, mAuthType, mIp, mCommand, mSrcPath, mDstPath, - mExecutionTimeNs / 1000); + mExecutionTimeNs / 1000)); + } + if (Configuration.global().getBoolean(PropertyKey.USER_CLIENT_REPORT_VERSION_ENABLED)) { + auditLog.append( + String.format("\tclientVersion=%s\t", mClientVersion)); } + auditLog.append("\tproto=rpc"); + return auditLog.toString(); } } diff --git a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java index 493c27869bac..3ac1c04612d7 100644 --- a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java @@ -16,6 +16,10 @@ import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.AlluxioException; +import alluxio.exception.ExceptionMessage; +import alluxio.exception.FileDoesNotExistException; +import alluxio.grpc.CancelSyncMetadataPRequest; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPRequest; import alluxio.grpc.CheckAccessPResponse; import alluxio.grpc.CheckConsistencyPOptions; @@ -30,6 +34,7 @@ import alluxio.grpc.CreateFilePResponse; import alluxio.grpc.DeletePRequest; import alluxio.grpc.DeletePResponse; +import alluxio.grpc.ExistsPOptions; import alluxio.grpc.ExistsPRequest; import alluxio.grpc.ExistsPResponse; import alluxio.grpc.FileSystemMasterClientServiceGrpc; @@ -37,6 +42,8 @@ import alluxio.grpc.FreePResponse; import alluxio.grpc.GetFilePathPRequest; import alluxio.grpc.GetFilePathPResponse; +import alluxio.grpc.GetJobProgressPRequest; +import alluxio.grpc.GetJobProgressPResponse; import alluxio.grpc.GetMountTablePRequest; import alluxio.grpc.GetMountTablePResponse; import alluxio.grpc.GetNewBlockIdForFilePRequest; @@ -48,7 +55,10 @@ import alluxio.grpc.GetStatusPResponse; import alluxio.grpc.GetSyncPathListPRequest; import alluxio.grpc.GetSyncPathListPResponse; +import alluxio.grpc.GetSyncProgressPRequest; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.GrpcUtils; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPRequest; import alluxio.grpc.ListStatusPResponse; import alluxio.grpc.ListStatusPartialPRequest; @@ -69,14 +79,24 @@ import alluxio.grpc.SetAttributePResponse; import alluxio.grpc.StartSyncPRequest; import alluxio.grpc.StartSyncPResponse; +import alluxio.grpc.StopJobPRequest; +import alluxio.grpc.StopJobPResponse; import alluxio.grpc.StopSyncPRequest; import alluxio.grpc.StopSyncPResponse; +import alluxio.grpc.SubmitJobPRequest; +import alluxio.grpc.SubmitJobPResponse; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPRequest; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPRequest; import alluxio.grpc.UnmountPResponse; import alluxio.grpc.UpdateMountPRequest; import alluxio.grpc.UpdateMountPResponse; import alluxio.grpc.UpdateUfsModePRequest; import alluxio.grpc.UpdateUfsModePResponse; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; +import alluxio.job.util.SerializationUtils; import alluxio.master.file.contexts.CheckAccessContext; import alluxio.master.file.contexts.CheckConsistencyContext; import alluxio.master.file.contexts.CompleteFileContext; @@ -93,8 +113,13 @@ import alluxio.master.file.contexts.ScheduleAsyncPersistenceContext; import alluxio.master.file.contexts.SetAclContext; import alluxio.master.file.contexts.SetAttributeContext; +import alluxio.master.file.contexts.SyncMetadataContext; +import alluxio.master.job.JobFactoryProducer; +import alluxio.master.scheduler.Scheduler; import alluxio.recorder.Recorder; +import alluxio.scheduler.job.Job; import alluxio.underfs.UfsMode; +import alluxio.util.io.PathUtils; import alluxio.wire.MountPointInfo; import alluxio.wire.SyncPointInfo; @@ -103,7 +128,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -117,15 +144,19 @@ public final class FileSystemMasterClientServiceHandler private static final Logger LOG = LoggerFactory.getLogger(FileSystemMasterClientServiceHandler.class); private final FileSystemMaster mFileSystemMaster; + private final Scheduler mScheduler; /** * Creates a new instance of {@link FileSystemMasterClientServiceHandler}. * * @param fileSystemMaster the {@link FileSystemMaster} the handler uses internally + * @param scheduler the {@link Scheduler} */ - public FileSystemMasterClientServiceHandler(FileSystemMaster fileSystemMaster) { + public FileSystemMasterClientServiceHandler(FileSystemMaster fileSystemMaster, + Scheduler scheduler) { Preconditions.checkNotNull(fileSystemMaster, "fileSystemMaster"); mFileSystemMaster = fileSystemMaster; + mScheduler = Preconditions.checkNotNull(scheduler, "scheduler"); } @Override @@ -178,11 +209,26 @@ public void completeFile(CompleteFilePRequest request, }, "CompleteFile", "request=%s", responseObserver, request); } + private void checkBucketPathExists(String path) + throws AlluxioException, IOException { + + String bucketPath = PathUtils.getFirstLevelDirectory(path); + boolean exists = mFileSystemMaster.exists(getAlluxioURI(bucketPath), + ExistsContext.create(ExistsPOptions.getDefaultInstance().toBuilder())); + if (!exists) { + throw new FileDoesNotExistException( + ExceptionMessage.BUCKET_DOES_NOT_EXIST.getMessage(bucketPath)); + } + } + @Override public void createDirectory(CreateDirectoryPRequest request, StreamObserver responseObserver) { CreateDirectoryPOptions options = request.getOptions(); RpcUtils.call(LOG, () -> { + if (request.getOptions().getCheckS3BucketPath()) { + checkBucketPathExists(request.getPath()); + } AlluxioURI pathUri = getAlluxioURI(request.getPath()); mFileSystemMaster.createDirectory(pathUri, CreateDirectoryContext.create(options.toBuilder()) .withTracker(new GrpcCallTracker(responseObserver))); @@ -194,6 +240,9 @@ public void createDirectory(CreateDirectoryPRequest request, public void createFile(CreateFilePRequest request, StreamObserver responseObserver) { RpcUtils.call(LOG, () -> { + if (request.getOptions().getCheckS3BucketPath()) { + checkBucketPathExists(request.getPath()); + } AlluxioURI pathUri = getAlluxioURI(request.getPath()); return CreateFilePResponse.newBuilder() .setFileInfo(GrpcUtils.toProto(mFileSystemMaster.createFile(pathUri, @@ -467,9 +516,9 @@ public void setAcl(SetAclPRequest request, StreamObserver respo @Override public void getStateLockHolders(GetStateLockHoldersPRequest request, - StreamObserver responseObserver) { + StreamObserver responseObserver) { RpcUtils.call(LOG, () -> { - final List holders = mFileSystemMaster.getStateLockSharedWaitersAndHolders(); + final Collection holders = mFileSystemMaster.getStateLockSharedWaitersAndHolders(); return GetStateLockHoldersPResponse.newBuilder().addAllThreads(holders).build(); }, "getStateLockHolders", "request=%s", responseObserver, request); } @@ -483,6 +532,59 @@ public void needsSync(NeedsSyncRequest request, }, "NeedsSync", true, "request=%s", responseObserver, request); } + @Override + public void submitJob(SubmitJobPRequest request, + StreamObserver responseObserver) { + + RpcUtils.call(LOG, () -> { + JobRequest jobRequest; + try { + jobRequest = (JobRequest) SerializationUtils.deserialize(request + .getRequestBody() + .toByteArray()); + } catch (Exception e) { + throw new IllegalArgumentException("fail to parse job request", e); + } + Job job = JobFactoryProducer.create(jobRequest, mFileSystemMaster).create(); + boolean submitted = mScheduler.submitJob(job); + SubmitJobPResponse.Builder builder = SubmitJobPResponse.newBuilder(); + if (submitted) { + builder.setJobId(job.getJobId()); + } + return builder.build(); + }, "submitJob", "request=%s", responseObserver, request); + } + + @Override + public void stopJob(StopJobPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + boolean stopped = mScheduler.stopJob(JobDescription.from(request.getJobDescription())); + return alluxio.grpc.StopJobPResponse.newBuilder() + .setJobStopped(stopped) + .build(); + }, "stopJob", "request=%s", responseObserver, request); + } + + @Override + public void getJobProgress(GetJobProgressPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + JobProgressReportFormat format = JobProgressReportFormat.TEXT; + if (request.hasOptions() && request.getOptions().hasFormat()) { + format = request.getOptions().getFormat(); + } + boolean verbose = false; + if (request.hasOptions() && request.getOptions().hasVerbose()) { + verbose = request.getOptions().getVerbose(); + } + return GetJobProgressPResponse.newBuilder() + .setProgressReport(mScheduler.getJobProgress( + JobDescription.from(request.getJobDescription()), format, verbose)) + .build(); + }, "getJobProgress", "request=%s", responseObserver, request); + } + /** * Helper to return {@link AlluxioURI} from transport URI. * @@ -492,4 +594,46 @@ public void needsSync(NeedsSyncRequest request, private AlluxioURI getAlluxioURI(String uriStr) { return new AlluxioURI(uriStr); } + + @Override + public void syncMetadata( + SyncMetadataPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + return mFileSystemMaster.syncMetadata( + new AlluxioURI(request.getPath()), + SyncMetadataContext.create(request.getOptions().toBuilder())); + }, "syncMetadata", "request=%s", responseObserver, request); + } + + @Override + public void syncMetadataAsync( + SyncMetadataPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + return mFileSystemMaster.syncMetadataAsync( + new AlluxioURI(request.getPath()), + SyncMetadataContext.create(request.getOptions().toBuilder())); + }, "syncMetadataAsync", "request=%s", responseObserver, request); + } + + @Override + public void getSyncProgress( + GetSyncProgressPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + return mFileSystemMaster.getSyncProgress( + request.getTaskGroupId()); + }, "syncMetadataAsync", "request=%s", responseObserver, request); + } + + @Override + public void cancelSyncMetadata( + CancelSyncMetadataPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + return mFileSystemMaster.cancelSyncMetadata( + request.getTaskGroupId()); + }, "cancelSyncMetadata", "request=%s", responseObserver, request); + } } diff --git a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java index a193863309c2..a7422eb006b8 100644 --- a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java +++ b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java @@ -48,6 +48,7 @@ import alluxio.master.file.meta.LockingScheme; import alluxio.master.file.meta.MountTable; import alluxio.master.file.meta.MutableInodeFile; +import alluxio.master.file.meta.SyncCheck; import alluxio.master.file.meta.SyncCheck.SyncResult; import alluxio.master.file.meta.UfsAbsentPathCache; import alluxio.master.file.meta.UfsSyncPathCache; @@ -295,6 +296,10 @@ public enum SyncStatus { private final int mConcurrencyLevel = Configuration.getInt(PropertyKey.MASTER_METADATA_SYNC_CONCURRENCY_LEVEL); + private final boolean mGetDirectoryStatusSkipLoadingChildren = + Configuration.getBoolean( + PropertyKey.MASTER_METADATA_SYNC_GET_DIRECTORY_STATUS_SKIP_LOADING_CHILDREN); + private final FileSystemMasterAuditContext mAuditContext; private final Function mAuditContextSrcInodeFunc; @@ -400,6 +405,12 @@ public InodeSyncStream(LockingScheme rootScheme, DefaultFileSystemMaster fsMaste * @return SyncStatus object */ public SyncStatus sync() throws AccessControlException, InvalidPathException { + LOG.debug("Running InodeSyncStream on path {}, with status {}, and force sync {}", + mRootScheme.getPath(), mRootScheme.shouldSync(), mForceSync); + if (!mRootScheme.shouldSync().isShouldSync() && !mForceSync) { + DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_SKIPPED.inc(); + return SyncStatus.NOT_NEEDED; + } if (!mDedupConcurrentSync) { return syncInternal(); } @@ -425,13 +436,7 @@ private SyncStatus syncInternal() throws int failedSyncPathCount = 0; int skippedSyncPathCount = 0; int stopNum = -1; // stop syncing when we've processed this many paths. -1 for infinite - LOG.debug("Running InodeSyncStream on path {}, with status {}, and force sync {}", - mRootScheme.getPath(), mRootScheme.shouldSync(), mForceSync); - if (!mRootScheme.shouldSync().isShouldSync() && !mForceSync) { - DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_SKIPPED.inc(); - return SyncStatus.NOT_NEEDED; - } - if (mDedupConcurrentSync) { + if (mDedupConcurrentSync && mRootScheme.shouldSync() != SyncCheck.SHOULD_SYNC) { /* * If a concurrent sync on the same path is successful after this sync had already * been initialized and that sync is successful, then there is no need to sync again. @@ -452,9 +457,10 @@ private SyncStatus syncInternal() throws * Note that this still applies if A is to sync recursively path /aaa while B is to * sync path /aaa/bbb as the sync scope of A covers B's. */ - boolean shouldSync = mUfsSyncPathCache.shouldSyncPath(mRootScheme.getPath(), mSyncInterval, + boolean shouldSkipSync = + mUfsSyncPathCache.shouldSyncPath(mRootScheme.getPath(), mSyncInterval, mDescendantType).getLastSyncTime() > mRootScheme.shouldSync().getLastSyncTime(); - if (shouldSync) { + if (shouldSkipSync) { DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_SKIPPED.inc(); LOG.debug("Skipped sync on {} due to successful concurrent sync", mRootScheme.getPath()); return SyncStatus.NOT_NEEDED; @@ -477,6 +483,10 @@ private SyncStatus syncInternal() throws // If descendantType is ONE, then we shouldn't process any more paths except for those // currently in the queue stopNum = mPendingPaths.size(); + } else if (mGetDirectoryStatusSkipLoadingChildren && mDescendantType == DescendantType.NONE) { + // If descendantType is NONE, do not process any path in the queue after + // the inode itself is loaded. + stopNum = 0; } // process the sync result for the original path @@ -830,7 +840,7 @@ private void syncExistingInodeMetadata( || !aclPair.getFirst().hasExtended()) { ufsFpParsed = Fingerprint.create(ufs.getUnderFSType(), cachedStatus); } else { - ufsFpParsed = Fingerprint.create(ufs.getUnderFSType(), cachedStatus, + ufsFpParsed = Fingerprint.create(ufs.getUnderFSType(), cachedStatus, null, aclPair.getFirst()); } } @@ -897,6 +907,8 @@ private void syncExistingInodeMetadata( if (mDescendantType == DescendantType.ONE) { syncChildren = syncChildren && mRootScheme.getPath().equals(inodePath.getUri()); + } else if (mDescendantType == DescendantType.NONE && mGetDirectoryStatusSkipLoadingChildren) { + syncChildren = false; } int childCount = inode.isDirectory() ? (int) inode.asDirectory().getChildCount() : 0; @@ -1200,7 +1212,7 @@ void loadFileMetadataInternal(RpcContext rpcContext, LockedInodePath inodePath, .setTtl(context.getOptions().getCommonOptions().getTtl()) .setTtlAction(context.getOptions().getCommonOptions().getTtlAction())); createFileContext.setWriteType(WriteType.THROUGH); // set as through since already in UFS - createFileContext.setMetadataLoad(true); + createFileContext.setMetadataLoad(true, true); createFileContext.setOwner(context.getUfsStatus().getOwner()); createFileContext.setGroup(context.getUfsStatus().getGroup()); createFileContext.setXAttr(context.getUfsStatus().getXAttr()); @@ -1217,20 +1229,25 @@ void loadFileMetadataInternal(RpcContext rpcContext, LockedInodePath inodePath, if (ufsLastModified != null) { createFileContext.setOperationTimeMs(ufsLastModified); } - + // If the journal context is a MetadataSyncMergeJournalContext, then the + // journals will be taken care and merged by that context already and hence + // there's no need to create a new MergeJournalContext. + boolean shouldUseMetadataSyncMergeJournalContext = + mUseFileSystemMergeJournalContext + && rpcContext.getJournalContext() instanceof MetadataSyncMergeJournalContext; try (LockedInodePath writeLockedPath = inodePath.lockFinalEdgeWrite(); - JournalContext merger = mUseFileSystemMergeJournalContext + JournalContext merger = shouldUseMetadataSyncMergeJournalContext ? NoopJournalContext.INSTANCE : new MergeJournalContext(rpcContext.getJournalContext(), writeLockedPath.getUri(), InodeSyncStream::mergeCreateComplete) ) { // We do not want to close this wrapRpcContext because it uses elements from another context - RpcContext wrapRpcContext = mUseFileSystemMergeJournalContext + RpcContext wrapRpcContext = shouldUseMetadataSyncMergeJournalContext ? rpcContext : new RpcContext( rpcContext.getBlockDeletionContext(), merger, rpcContext.getOperationContext()); - fsMaster.createFileInternal(wrapRpcContext, writeLockedPath, createFileContext); + fsMaster.createFileInternal(wrapRpcContext, writeLockedPath, createFileContext, true); CompleteFileContext completeContext = CompleteFileContext.mergeFrom(CompleteFilePOptions.newBuilder().setUfsLength(ufsLength)) .setUfsStatus(context.getUfsStatus()).setMetadataLoad(true); @@ -1305,7 +1322,7 @@ private static void loadDirectoryMetadataInternal(RpcContext rpcContext, MountTa .setTtl(context.getOptions().getCommonOptions().getTtl()) .setTtlAction(context.getOptions().getCommonOptions().getTtlAction())); createDirectoryContext.setMountPoint(isMountPoint); - createDirectoryContext.setMetadataLoad(true); + createDirectoryContext.setMetadataLoad(true, true); createDirectoryContext.setWriteType(WriteType.THROUGH); AccessControlList acl = null; @@ -1374,16 +1391,16 @@ private void maybeFlushJournalToAsyncJournalWriter(RpcContext rpcContext) { protected RpcContext getMetadataSyncRpcContext() { JournalContext journalContext = mRpcContext.getJournalContext(); - if (!mUseFileSystemMergeJournalContext - || !(journalContext instanceof FileSystemMergeJournalContext)) { - return mRpcContext; + if (mUseFileSystemMergeJournalContext + && journalContext instanceof FileSystemMergeJournalContext) { + return new RpcContext( + mRpcContext.getBlockDeletionContext(), + new MetadataSyncMergeJournalContext( + ((FileSystemMergeJournalContext) journalContext).getUnderlyingJournalContext(), + new FileSystemJournalEntryMerger()), + mRpcContext.getOperationContext()); } - return new RpcContext( - mRpcContext.getBlockDeletionContext(), - new MetadataSyncMergeJournalContext( - ((FileSystemMergeJournalContext) journalContext).getUnderlyingJournalContext(), - new FileSystemJournalEntryMerger()), - mRpcContext.getOperationContext()); + return mRpcContext; } @Override diff --git a/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java b/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java index 76796c39b1fa..595322679c31 100644 --- a/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java +++ b/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java @@ -30,10 +30,13 @@ import alluxio.master.journal.JournalContext; import alluxio.master.journal.NoopJournalContext; import alluxio.proto.journal.File.UpdateInodeEntry; +import alluxio.util.ThreadUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.HashMap; +import java.util.Map; import java.util.Set; import javax.annotation.concurrent.NotThreadSafe; @@ -58,33 +61,47 @@ public InodeTtlChecker(FileSystemMaster fileSystemMaster, InodeTree inodeTree) { } @Override - public void heartbeat() throws InterruptedException { - Set expiredBuckets = mTtlBuckets.getExpiredBuckets(System.currentTimeMillis()); + public void heartbeat(long timeLimitMs) throws InterruptedException { + Set expiredBuckets = mTtlBuckets.pollExpiredBuckets(System.currentTimeMillis()); + Map failedInodesToRetryNum = new HashMap<>(); for (TtlBucket bucket : expiredBuckets) { - for (Inode inode : bucket.getInodes()) { + for (Map.Entry inodeExpiryEntry : bucket.getInodeExpiries()) { // Throw if interrupted. if (Thread.interrupted()) { throw new InterruptedException("InodeTtlChecker interrupted."); } + long inodeId = inodeExpiryEntry.getKey(); + int leftRetries = inodeExpiryEntry.getValue(); + // Exhausted retry attempt to expire this inode, bail. + if (leftRetries <= 0) { + continue; + } AlluxioURI path = null; try (LockedInodePath inodePath = mInodeTree.lockFullInodePath( - inode.getId(), LockPattern.READ, NoopJournalContext.INSTANCE) + inodeId, LockPattern.READ, NoopJournalContext.INSTANCE) ) { path = inodePath.getUri(); } catch (FileDoesNotExistException e) { // The inode has already been deleted, nothing needs to be done. continue; } catch (Exception e) { - LOG.error("Exception trying to clean up {} for ttl check: {}", inode.toString(), - e.toString()); + LOG.error("Exception trying to clean up inode:{},path:{} for ttl check: {}", inodeId, + path, e.toString()); } if (path != null) { + Inode inode = null; try { + inode = mTtlBuckets.loadInode(inodeId); + // Check again if this inode is indeed expired. + if (inode == null || inode.getTtl() == Constants.NO_TTL + || inode.getCreationTimeMs() + inode.getTtl() > System.currentTimeMillis()) { + continue; + } TtlAction ttlAction = inode.getTtlAction(); LOG.info("Path {} TTL has expired, performing action {}", path.getPath(), ttlAction); switch (ttlAction) { - case FREE: + case FREE: // Default: FREE // public free method will lock the path, and check WRITE permission required at // parent of file if (inode.isDirectory()) { @@ -102,9 +119,8 @@ public void heartbeat() throws InterruptedException { .setTtlAction(ProtobufUtils.toProtobuf(TtlAction.DELETE)) .build()); } - mTtlBuckets.remove(inode); break; - case DELETE:// Default if not set is DELETE + case DELETE: // public delete method will lock the path, and check WRITE permission required at // parent of file if (inode.isDirectory()) { @@ -114,16 +130,40 @@ public void heartbeat() throws InterruptedException { mFileSystemMaster.delete(path, DeleteContext.defaults()); } break; + case DELETE_ALLUXIO: + // public delete method will lock the path, and check WRITE permission required at + // parent of file + if (inode.isDirectory()) { + mFileSystemMaster.delete(path, + DeleteContext.mergeFrom(DeletePOptions.newBuilder() + .setRecursive(true).setAlluxioOnly(true))); + } else { + mFileSystemMaster.delete(path, + DeleteContext.mergeFrom(DeletePOptions.newBuilder() + .setAlluxioOnly(true))); + } + break; default: LOG.error("Unknown ttl action {}", ttlAction); } } catch (Exception e) { - LOG.error("Exception trying to clean up {} for ttl check", inode, e); + boolean retryExhausted = --leftRetries <= 0; + if (retryExhausted) { + LOG.error("Retry exhausted to clean up {} for ttl check. {}", + path, ThreadUtils.formatStackTrace(e)); + } else if (inode != null) { + failedInodesToRetryNum.put(inode, leftRetries); + } } } } } - mTtlBuckets.removeBuckets(expiredBuckets); + // Put back those failed-to-expire inodes for next round retry. + if (!failedInodesToRetryNum.isEmpty()) { + for (Map.Entry failedInodeEntry : failedInodesToRetryNum.entrySet()) { + mTtlBuckets.insert(failedInodeEntry.getKey(), failedInodeEntry.getValue()); + } + } } @Override diff --git a/core/server/master/src/main/java/alluxio/master/file/LostFileDetector.java b/core/server/master/src/main/java/alluxio/master/file/LostFileDetector.java index 3eadbc4a5c25..9f25b8d8a857 100644 --- a/core/server/master/src/main/java/alluxio/master/file/LostFileDetector.java +++ b/core/server/master/src/main/java/alluxio/master/file/LostFileDetector.java @@ -59,9 +59,9 @@ public LostFileDetector(FileSystemMaster fileSystemMaster, BlockMaster blockMast } @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { Iterator iter = mBlockMaster.getLostBlocksIterator(); - Set markedFiles = new HashSet<>(); + Set toMarkFiles = new HashSet<>(); while (iter.hasNext()) { if (Thread.interrupted()) { throw new InterruptedException("LostFileDetector interrupted."); @@ -69,48 +69,52 @@ public void heartbeat() throws InterruptedException { long blockId = iter.next(); long containerId = BlockId.getContainerId(blockId); long fileId = IdUtils.createFileId(containerId); - if (markedFiles.contains(fileId)) { + if (toMarkFiles.contains(fileId)) { iter.remove(); continue; } - boolean markAsLost = false; try ( LockedInodePath inodePath = mInodeTree.lockFullInodePath(fileId, LockPattern.READ, NoopJournalContext.INSTANCE) ) { Inode inode = inodePath.getInode(); if (inode.getPersistenceState() != PersistenceState.PERSISTED) { - markAsLost = true; + toMarkFiles.add(fileId); } + iter.remove(); } catch (FileDoesNotExistException e) { LOG.debug("Exception trying to get inode from inode tree", e); iter.remove(); continue; } + } - if (markAsLost) { - // update the state - try (JournalContext journalContext = mFileSystemMaster.createJournalContext(); - LockedInodePath inodePath = - mInodeTree.lockFullInodePath(fileId, LockPattern.WRITE_INODE, journalContext)) { - Inode inode = inodePath.getInode(); - if (inode.getPersistenceState() != PersistenceState.PERSISTED) { - mInodeTree.updateInode(journalContext, - UpdateInodeEntry.newBuilder().setId(inode.getId()) - .setPersistenceState(PersistenceState.LOST.name()).build()); - markedFiles.add(fileId); + if (toMarkFiles.size() > 0) { + // Here the candidate block has been removed from the checklist + // But the journal entries have not yet been flushed + // If the journal entries are lost, we will never be able to mark them again, + // because the worker will never report those removedBlocks to the master again + // This is fine because the LOST status is purely for display now + try (JournalContext journalContext = mFileSystemMaster.createJournalContext()) { + // update the state on the 2nd pass + for (long fileId : toMarkFiles) { + try (LockedInodePath inodePath = mInodeTree.lockFullInodePath( + fileId, LockPattern.WRITE_INODE, journalContext)) { + Inode inode = inodePath.getInode(); + if (inode.getPersistenceState() != PersistenceState.PERSISTED) { + mInodeTree.updateInode(journalContext, + UpdateInodeEntry.newBuilder().setId(inode.getId()) + .setPersistenceState(PersistenceState.LOST.name()).build()); + toMarkFiles.add(fileId); + } + } catch (FileDoesNotExistException e) { + LOG.debug("Failed to mark file {} as lost. The inode does not exist anymore.", + fileId, e); } - iter.remove(); - } catch (FileDoesNotExistException e) { - LOG.debug("Failed to mark file {} as lost. The inode does not exist anymore.", - fileId, e); - iter.remove(); - } catch (UnavailableException e) { - LOG.warn("Failed to mark files LOST because the journal is not available. " - + "{} files are affected: {}", - markedFiles.size(), markedFiles, e); - break; } + } catch (UnavailableException e) { + LOG.error("Failed to mark files LOST because the journal is not available. " + + "{} files are affected: {}", toMarkFiles.size(), toMarkFiles, e); } } } diff --git a/core/server/master/src/main/java/alluxio/master/file/RpcContext.java b/core/server/master/src/main/java/alluxio/master/file/RpcContext.java index bc71ea478070..75e78bc04e0f 100644 --- a/core/server/master/src/main/java/alluxio/master/file/RpcContext.java +++ b/core/server/master/src/main/java/alluxio/master/file/RpcContext.java @@ -37,7 +37,7 @@ * guarantees about the order in which resources are closed. */ @NotThreadSafe -public final class RpcContext implements Closeable, Supplier { +public class RpcContext implements Closeable, Supplier { public static final RpcContext NOOP = new RpcContext(NoopBlockDeletionContext.INSTANCE, NoopJournalContext.INSTANCE, new InternalOperationContext()); diff --git a/core/server/master/src/main/java/alluxio/master/file/UfsCleaner.java b/core/server/master/src/main/java/alluxio/master/file/UfsCleaner.java index bc9ab0ab6ef4..5d1261bff807 100644 --- a/core/server/master/src/main/java/alluxio/master/file/UfsCleaner.java +++ b/core/server/master/src/main/java/alluxio/master/file/UfsCleaner.java @@ -30,7 +30,7 @@ public UfsCleaner(FileSystemMaster fileSystemMaster) { } @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { mFileSystemMaster.cleanupUfs(); } diff --git a/core/server/master/src/main/java/alluxio/master/file/UfsSyncChecker.java b/core/server/master/src/main/java/alluxio/master/file/UfsSyncChecker.java index 118b0e917724..b00eb3796395 100644 --- a/core/server/master/src/main/java/alluxio/master/file/UfsSyncChecker.java +++ b/core/server/master/src/main/java/alluxio/master/file/UfsSyncChecker.java @@ -160,7 +160,7 @@ private UfsStatus[] getChildrenInUFS(AlluxioURI alluxioUri) childrenList.add(newStatus); } } - return trimIndirect(childrenList.toArray(new UfsStatus[childrenList.size()])); + return trimIndirect(childrenList.toArray(new UfsStatus[0])); } curUri = curUri.getParent(); } @@ -189,6 +189,6 @@ private UfsStatus[] trimIndirect(UfsStatus[] children) { childrenList.add(child); } } - return childrenList.toArray(new UfsStatus[childrenList.size()]); + return childrenList.toArray(new UfsStatus[0]); } } diff --git a/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncManager.java b/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncManager.java index 1d038633fbb3..6993b31027dd 100644 --- a/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncManager.java +++ b/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncManager.java @@ -17,6 +17,7 @@ import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.InvalidPathException; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatThread; import alluxio.master.file.FileSystemMaster; @@ -262,7 +263,8 @@ public void launchPollingThread(long mountId, long txId) { ActiveSyncer syncer = new ActiveSyncer(mFileSystemMaster, this, mMountTable, mountId); Future future = getExecutor().submit( new HeartbeatThread(HeartbeatContext.MASTER_ACTIVE_UFS_SYNC, - syncer, (int) Configuration.getMs(PropertyKey.MASTER_UFS_ACTIVE_SYNC_INTERVAL), + syncer, () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_UFS_ACTIVE_SYNC_INTERVAL)), Configuration.global(), ServerUserState.global())); mPollerMap.put(mountId, future); } diff --git a/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncer.java b/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncer.java index 666da9434682..e9ba8aebec3c 100644 --- a/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncer.java +++ b/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncer.java @@ -74,7 +74,7 @@ public ActiveSyncer(FileSystemMaster fileSystemMaster, ActiveSyncManager syncMan } @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { LOG.debug("start sync heartbeat for {} with mount id {}", mMountUri, mMountId); // Remove any previously completed sync tasks mSyncTasks.removeIf(Future::isDone); diff --git a/core/server/master/src/main/java/alluxio/master/file/contexts/CreateFileContext.java b/core/server/master/src/main/java/alluxio/master/file/contexts/CreateFileContext.java index 8de11ea14bf2..97bcd9ccd775 100644 --- a/core/server/master/src/main/java/alluxio/master/file/contexts/CreateFileContext.java +++ b/core/server/master/src/main/java/alluxio/master/file/contexts/CreateFileContext.java @@ -18,14 +18,67 @@ import com.google.common.base.MoreObjects; +import java.util.List; +import javax.annotation.Nullable; + /** * Implementation of {@link OperationContext} used to merge and wrap {@link CreateFilePOptions}. */ public class CreateFileContext extends CreatePathContext { + /** + * A class for complete file info. + */ + public static class CompleteFileInfo { + /** + * Constructs an instance. + * @param containerId the file container id + * @param length the file size + * @param blockIds the block ids in the file + */ + public CompleteFileInfo(long containerId, long length, List blockIds) { + mBlockIds = blockIds; + mContainerId = containerId; + mLength = length; + } + + /** + * If set, the new file will use this id instead of a generated one when the file is created. + */ + private final long mContainerId; + private final long mLength; + private final List mBlockIds; + + /** + * @return the container id + */ + public long getContainerId() { + return mContainerId; + } + + /** + * @return the file length + */ + public long getLength() { + return mLength; + } + + /** + * @return the block ids in the file + */ + public List getBlockIds() { + return mBlockIds; + } + } private boolean mCacheable; + /** + * If set, the file will be mark as completed when it gets created in the inode tree. + * Used in metadata sync. + */ + @Nullable private CompleteFileInfo mCompleteFileInfo; + /** * Creates context with given option data. * @@ -34,6 +87,7 @@ public class CreateFileContext private CreateFileContext(CreateFilePOptions.Builder optionsBuilder) { super(optionsBuilder); mCacheable = false; + mCompleteFileInfo = null; } /** @@ -58,6 +112,15 @@ public static CreateFileContext mergeFrom(CreateFilePOptions.Builder optionsBuil return new CreateFileContext(mergedOptionsBuilder); } + /** + * Merges and creates a CreateFileContext. + * @param optionsBuilder the options builder template + * @return the context + */ + public static CreateFileContext mergeFromDefault(CreateFilePOptions optionsBuilder) { + return new CreateFileContext(CreateFilePOptions.newBuilder().mergeFrom(optionsBuilder)); + } + /** * @return the instance of {@link CreateFileContext} with default values for master */ @@ -90,11 +153,33 @@ public OperationId getOperationId() { return super.getOperationId(); } + /** + * @param completeFileInfo if the file is expected to mark as completed when it is created + * @return the updated context object + */ + public CreateFileContext setCompleteFileInfo(CompleteFileInfo completeFileInfo) { + mCompleteFileInfo = completeFileInfo; + return getThis(); + } + + /** + * @return the complete file info object + */ + public CompleteFileInfo getCompleteFileInfo() { + return mCompleteFileInfo; + } + @Override public String toString() { - return MoreObjects.toStringHelper(this) + MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this) .add("PathContext", super.toString()) - .add("Cacheable", mCacheable) - .toString(); + .add("Cacheable", mCacheable); + + if (mCompleteFileInfo != null) { + helper.add("Length", mCompleteFileInfo.getLength()) + .add("IsCompleted", true) + .add("BlockContainerId", mCompleteFileInfo.getContainerId()); + } + return helper.toString(); } } diff --git a/core/server/master/src/main/java/alluxio/master/file/contexts/CreatePathContext.java b/core/server/master/src/main/java/alluxio/master/file/contexts/CreatePathContext.java index bf8719fcc8d3..730f6e050608 100644 --- a/core/server/master/src/main/java/alluxio/master/file/contexts/CreatePathContext.java +++ b/core/server/master/src/main/java/alluxio/master/file/contexts/CreatePathContext.java @@ -11,6 +11,7 @@ package alluxio.master.file.contexts; +import alluxio.Constants; import alluxio.client.WriteType; import alluxio.conf.Configuration; import alluxio.grpc.CreateDirectoryPOptions; @@ -24,12 +25,14 @@ import alluxio.util.SecurityUtils; import com.google.common.base.MoreObjects; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.protobuf.GeneratedMessageV3; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.function.Supplier; import javax.annotation.Nullable; /** @@ -48,9 +51,11 @@ public abstract class CreatePathContext, protected String mOwner; protected String mGroup; protected boolean mMetadataLoad; + protected boolean mPersistNonExistingParentDirectories = true; private WriteType mWriteType; protected Map mXAttr; protected XAttrPropagationStrategy mXAttrPropStrat; + @Nullable protected Supplier mMissingDirFingerprint = null; // // Values for the below fields will be extracted from given proto options @@ -64,6 +69,7 @@ public abstract class CreatePathContext, protected boolean mRecursive; protected long mTtl; protected TtlAction mTtlAction; + @Nullable protected String mFingerprint; /** * Creates context with given option data. @@ -78,6 +84,7 @@ protected CreatePathContext(T optionsBuilder) { mMetadataLoad = false; mGroup = ""; mOwner = ""; + mFingerprint = null; if (SecurityUtils.isAuthenticationEnabled(Configuration.global())) { mOwner = SecurityUtils.getOwnerFromGrpcClient(Configuration.global()); mGroup = SecurityUtils.getGroupFromGrpcClient(Configuration.global()); @@ -269,12 +276,18 @@ public String getGroup() { } /** - * @param metadataLoad the flag value to use; if true, the create path is a result of a metadata + * @param metadataLoad the flag value to use; if true, the created path is a result of a metadata * load + * @param persistNonExistingParentDirectories if true any non-existing parent directories + * will also be created on the UFS (this can only be + * set to false if metadataLoad is set to true) * @return the updated context */ - public K setMetadataLoad(boolean metadataLoad) { + public K setMetadataLoad( + boolean metadataLoad, boolean persistNonExistingParentDirectories) { + Preconditions.checkState(metadataLoad || persistNonExistingParentDirectories); mMetadataLoad = metadataLoad; + mPersistNonExistingParentDirectories = persistNonExistingParentDirectories; return getThis(); } @@ -295,6 +308,40 @@ public K setXAttr(@Nullable Map xattr) { return getThis(); } + /** + * @return the fingerprint + */ + @Nullable + public String getFingerprint() { + return mFingerprint; + } + + /** + * @param fingerprint the fingerprint + * @return the updated context + */ + public K setFingerprint(String fingerprint) { + mFingerprint = fingerprint; + return getThis(); + } + + /** + * @return the fingerprint for missing directories + */ + public String getMissingDirFingerprint() { + return mMissingDirFingerprint == null + ? Constants.INVALID_UFS_FINGERPRINT : mMissingDirFingerprint.get(); + } + + /** + * @param fingerprint the fingerprint to be used when creating missing nested directories + * @return the updated context + */ + public K setMissingDirFingerprint(Supplier fingerprint) { + mMissingDirFingerprint = fingerprint; + return getThis(); + } + /** * @return extended attributes propagation strategy of this context */ @@ -302,6 +349,14 @@ public XAttrPropagationStrategy getXAttrPropStrat() { return mXAttrPropStrat; } + /** + * @return true if non-existing parent directories should be persisted, + * can only be false if the metadataLoad flag is true + */ + public boolean isPersistNonExistingParentDirectories() { + return mPersistNonExistingParentDirectories; + } + /** * @return the metadataLoad flag; if true, the create path is a result of a metadata load */ @@ -320,6 +375,7 @@ public String toString() { .add("MetadataLoad", mMetadataLoad) .add("writeType", mWriteType) .add("xattr", mXAttr) + .add("Fingerprint", mFingerprint) .toString(); } } diff --git a/core/server/master/src/main/java/alluxio/master/file/contexts/DeleteContext.java b/core/server/master/src/main/java/alluxio/master/file/contexts/DeleteContext.java index 3bb640bc7cd6..6129b26dbe45 100644 --- a/core/server/master/src/main/java/alluxio/master/file/contexts/DeleteContext.java +++ b/core/server/master/src/main/java/alluxio/master/file/contexts/DeleteContext.java @@ -23,6 +23,7 @@ */ public class DeleteContext extends OperationContext { private boolean mMetadataLoad = false; + private boolean mSkipNotPersisted = false; /** * Creates context with given option data. @@ -81,6 +82,22 @@ public DeleteContext setMetadataLoad(boolean metadataLoad) { return this; } + /** + * @param skipNotPersisted if true non-completed, or non-persisted files will be skipped + * @return the updated context + */ + public DeleteContext skipNotPersisted(boolean skipNotPersisted) { + mSkipNotPersisted = skipNotPersisted; + return this; + } + + /** + * @return true if the deletion should skip non-completed, or non-persisted files + */ + public boolean isSkipNotPersisted() { + return mSkipNotPersisted; + } + /** * @return the metadataLoad flag; if true, the operation is a result of a metadata load */ diff --git a/core/server/master/src/main/java/alluxio/master/file/contexts/ListStatusContext.java b/core/server/master/src/main/java/alluxio/master/file/contexts/ListStatusContext.java index 19b7bc05fef9..9f633c6ecaa7 100644 --- a/core/server/master/src/main/java/alluxio/master/file/contexts/ListStatusContext.java +++ b/core/server/master/src/main/java/alluxio/master/file/contexts/ListStatusContext.java @@ -12,10 +12,13 @@ package alluxio.master.file.contexts; import alluxio.conf.Configuration; +import alluxio.grpc.FileSystemMasterCommonPOptions; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; +import alluxio.grpc.LoadMetadataPType; import alluxio.util.FileSystemOptionsUtils; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.MoreObjects; import java.util.Optional; @@ -32,6 +35,7 @@ public class ListStatusContext private boolean mDoneListing = false; private long mTotalListings; private final ListStatusPartialPOptions.Builder mPartialPOptions; + private boolean mDisableMetadataSync = false; /** * @@ -41,6 +45,27 @@ public Optional getPartialOptions() { return Optional.ofNullable(mPartialPOptions); } + /** + * Set to true to disable metadata sync. + * @return the context + */ + @VisibleForTesting + public ListStatusContext disableMetadataSync() { + mDisableMetadataSync = true; + getOptions().setLoadMetadataType(LoadMetadataPType.NEVER) + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() + .setSyncIntervalMs(-1).mergeFrom( + getOptions().getCommonOptions()).buildPartial()); + return this; + } + + /** + * @return true if metadata sync has been disabled for this operation + */ + public boolean isDisableMetadataSync() { + return mDisableMetadataSync; + } + /** * Creates context with given option data. * diff --git a/core/server/master/src/main/java/alluxio/master/file/contexts/SyncMetadataContext.java b/core/server/master/src/main/java/alluxio/master/file/contexts/SyncMetadataContext.java new file mode 100644 index 000000000000..398b680edaea --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/contexts/SyncMetadataContext.java @@ -0,0 +1,73 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.contexts; + +import alluxio.conf.Configuration; +import alluxio.grpc.ExistsPOptions; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.util.FileSystemOptionsUtils; + +import com.google.common.base.MoreObjects; + +/** + * Used to merge and wrap {@link SyncMetadataPOptions}. + */ +public class SyncMetadataContext + extends OperationContext { + + /** + * Creates context with given option data. + * + * @param optionsBuilder options builder + */ + private SyncMetadataContext(SyncMetadataPOptions.Builder optionsBuilder) { + super(optionsBuilder); + } + + /** + * @param optionsBuilder Builder for proto {@link SyncMetadataPOptions} + * @return the instance of {@link SyncMetadataContext} with given options + */ + public static SyncMetadataContext create(SyncMetadataPOptions.Builder optionsBuilder) { + return new SyncMetadataContext(optionsBuilder); + } + + /** + * Merges and embeds the given {@link ExistsPOptions} with the corresponding master + * options. + * + * @param optionsBuilder Builder for proto {@link ExistsPOptions} to merge with defaults + * @return the instance of {@link SyncMetadataContext} with default values for master + */ + public static SyncMetadataContext mergeFrom(SyncMetadataPOptions.Builder optionsBuilder) { + SyncMetadataPOptions masterOptions = + FileSystemOptionsUtils.syncMetadataDefaults(Configuration.global()); + SyncMetadataPOptions.Builder mergedOptionsBuilder = + masterOptions.toBuilder().mergeFrom(optionsBuilder.build()); + return create(mergedOptionsBuilder); + } + + /** + * @return the instance of {@link SyncMetadataContext} with default values for master + */ + public static SyncMetadataContext defaults() { + return create(FileSystemOptionsUtils + .syncMetadataDefaults(Configuration.global()).toBuilder()); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("ProtoOptions", getOptions().build()) + .toString(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTask.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTask.java new file mode 100644 index 000000000000..1006baf01126 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTask.java @@ -0,0 +1,314 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.collections.Pair; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.DeadlineExceededRuntimeException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.exception.status.CancelledException; +import alluxio.exception.status.UnavailableException; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.SyncMetadataState; +import alluxio.grpc.SyncMetadataTask; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.LockedInodePath; +import alluxio.master.journal.JournalContext; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; +import alluxio.util.CommonUtils; +import alluxio.util.ExceptionUtils; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.base.Stopwatch; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; + +/** + * This is the overall task for a sync operation. + */ +public abstract class BaseTask implements PathWaiter { + enum State { + RUNNING, + SUCCEEDED, + FAILED, + CANCELED; + + SyncMetadataState toProto() { + switch (this) { + case RUNNING: + return SyncMetadataState.RUNNING; + case SUCCEEDED: + return SyncMetadataState.SUCCEEDED; + case FAILED: + return SyncMetadataState.FAILED; + case CANCELED: + return SyncMetadataState.CANCELED; + default: + return SyncMetadataState.UNKNOWN; + } + } + } + + private static final Logger LOG = LoggerFactory.getLogger(BaseTask.class); + + private final long mStartTime; + private volatile Long mFinishTime = null; + BaseTaskResult mIsCompleted = null; + private final TaskInfo mTaskInfo; + private final PathLoaderTask mPathLoadTask; + private final boolean mRemoveOnComplete; + + /** + * @return the task state + */ + public synchronized State getState() { + if (!isCompleted().isPresent()) { + return State.RUNNING; + } + BaseTaskResult result = isCompleted().get(); + if (result.succeeded()) { + return State.SUCCEEDED; + } else if (result.getThrowable().orElse(null) instanceof CancelledException) { + return State.CANCELED; + } else { + return State.FAILED; + } + } + + /** + * @return true if the task is completed + */ + public synchronized Optional isCompleted() { + return Optional.ofNullable(mIsCompleted); + } + + /** + * @return if the task is succeeded + */ + public synchronized boolean succeeded() { + return mIsCompleted != null && mIsCompleted.succeeded(); + } + + @VisibleForTesting + PathLoaderTask getPathLoadTask() { + return mPathLoadTask; + } + + static BaseTask create( + TaskInfo info, long startTime, + Function> clientSupplier, + boolean removeOnComplete) { + if (info.getLoadByDirectory() != DirectoryLoadType.SINGLE_LISTING + && info.getDescendantType() == DescendantType.ALL) { + return new DirectoryPathWaiter( + info, startTime, clientSupplier, removeOnComplete); + } else { + return new BatchPathWaiter( + info, startTime, clientSupplier, removeOnComplete); + } + } + + static BaseTask create( + TaskInfo info, long startTime, + Function> clientSupplier) { + return create(info, startTime, clientSupplier, true); + } + + BaseTask( + TaskInfo info, long startTime, + Function> clientSupplier, boolean removeOnComplete) { + mTaskInfo = info; + mStartTime = startTime; + mPathLoadTask = new PathLoaderTask(mTaskInfo, null, clientSupplier); + mRemoveOnComplete = removeOnComplete; + } + + /** + * @return the task info + */ + public TaskInfo getTaskInfo() { + return mTaskInfo; + } + + /** + * @return true, if the task should be removed on completion, otherwise it will be + * moved to a completed task cache. + */ + boolean removeOnComplete() { + return mRemoveOnComplete; + } + + /** + * @return the sync task time in ms + */ + public synchronized long getStartTime() { + Preconditions.checkState(mIsCompleted != null, + "Task must be completed before accessing the start time"); + return mStartTime; + } + + PathLoaderTask getLoadTask() { + return mPathLoadTask; + } + + synchronized void onComplete( + boolean isFile, DefaultFileSystemMaster fileSystemMaster, InodeTree inodeTree) { + if (mIsCompleted != null) { + return; + } + updateDirectChildrenLoaded(fileSystemMaster, inodeTree); + mFinishTime = CommonUtils.getCurrentMs(); + mIsCompleted = new BaseTaskResult(null); + mTaskInfo.getMdSync().onTaskComplete(mTaskInfo.getId(), isFile); + notifyAll(); + } + + /** + * Blocking waits until the task completes. + * If the task fails, the exception causing the failure is thrown. + * If the wait times-out a {@link DeadlineExceededRuntimeException} is thrown. + * + * @param timeoutMs the timeout in ms, 0 for an endless wait + */ + public synchronized void waitComplete(long timeoutMs) throws Throwable { + Stopwatch sw = Stopwatch.createStarted(); + long waitTime = timeoutMs; + while (mIsCompleted == null && (timeoutMs == 0 || waitTime > 0)) { + wait(waitTime); + if (timeoutMs != 0) { + waitTime = waitTime - sw.elapsed(TimeUnit.MILLISECONDS); + sw.reset(); + } + } + if (mIsCompleted == null) { + throw new DeadlineExceededRuntimeException("Task still running."); + } + if (mIsCompleted.getThrowable().isPresent()) { + throw mIsCompleted.getThrowable().get(); + } + } + + synchronized void onFailed(Throwable t) { + mFinishTime = CommonUtils.getCurrentMs(); + if (mIsCompleted != null) { + return; + } + mIsCompleted = new BaseTaskResult(t); + LOG.warn("Task {} failed with error", mTaskInfo, t); + cancel(); + mTaskInfo.getMdSync().onTaskError(mTaskInfo.getId(), t); + } + + synchronized long cancel() { + mFinishTime = CommonUtils.getCurrentMs(); + if (mIsCompleted == null) { + mIsCompleted = new BaseTaskResult(new CancelledException("Task was cancelled")); + } + mPathLoadTask.cancel(); + notifyAll(); + return mTaskInfo.getId(); + } + + boolean pathIsCovered(AlluxioURI path, DescendantType depth) { + switch (mTaskInfo.getDescendantType()) { + case NONE: + return depth == DescendantType.NONE && mTaskInfo.getBasePath().equals(path); + case ONE: + return (depth != DescendantType.ALL && mTaskInfo.getBasePath().equals(path)) + || (depth == DescendantType.NONE && mTaskInfo.getBasePath().equals(path.getParent())); + case ALL: + try { + return mTaskInfo.getBasePath().isAncestorOf(path); + } catch (InvalidPathException e) { + throw new InternalRuntimeException(e); + } + default: + throw new InternalRuntimeException(String.format( + "Unknown descendant type %s", mTaskInfo.getDescendantType())); + } + } + + /** + * @return the sync duration in ms + */ + public long getSyncDuration() { + final Long finishTime = mFinishTime; + if (finishTime == null) { + return CommonUtils.getCurrentMs() - mStartTime; + } + return mFinishTime - mStartTime; + } + + /** + * @return the sync metadata task in proto + */ + public synchronized SyncMetadataTask toProtoTask() { + SyncMetadataTask.Builder builder = SyncMetadataTask.newBuilder(); + builder.setId(getTaskInfo().getId()); + builder.setState(getState().toProto()); + builder.setSyncDurationMs(getSyncDuration()); + Throwable t = null; + if (mIsCompleted != null && mIsCompleted.getThrowable().isPresent()) { + t = mIsCompleted.getThrowable().get(); + } + if (t != null && getState() != State.CANCELED) { + builder.setException(SyncMetadataTask.Exception.newBuilder() + .setExceptionType(t.getClass().getTypeName()) + .setExceptionMessage(t.getMessage() == null ? "" : t.getMessage()) + .setStacktrace(ExceptionUtils.asPlainText(t))); + } + builder.setTaskInfoString(getTaskInfo().toString()); + Pair statReport = getTaskInfo().getStats().toReportString(); + builder.setSuccessOpCount(statReport.getFirst()); + builder.setTaskStatString(statReport.getSecond()); + return builder.build(); + } + + /** + * Updates direct children loaded for directories affected by the metadata sync. + * @param fileSystemMaster the file system master + * @param inodeTree the inode tree + */ + public void updateDirectChildrenLoaded( + DefaultFileSystemMaster fileSystemMaster, InodeTree inodeTree) { + try (JournalContext journalContext = fileSystemMaster.createJournalContext()) { + getTaskInfo().getPathsToUpdateDirectChildrenLoaded().forEach( + uri -> { + try (LockedInodePath lockedInodePath = + inodeTree.lockInodePath( + uri, InodeTree.LockPattern.WRITE_INODE, + journalContext)) { + if (lockedInodePath.fullPathExists() && lockedInodePath.getInode().isDirectory() + && !lockedInodePath.getInode().asDirectory().isDirectChildrenLoaded()) { + inodeTree.setDirectChildrenLoaded( + () -> journalContext, + lockedInodePath.getInode().asDirectory()); + } + } catch (FileDoesNotExistException | InvalidPathException e) { + throw new RuntimeException(e); + } + }); + } catch (UnavailableException e) { + throw new RuntimeException(e); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTaskResult.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTaskResult.java new file mode 100644 index 000000000000..3e5b32cb5f91 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTaskResult.java @@ -0,0 +1,35 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import java.util.Optional; +import javax.annotation.Nullable; + +/** + * The overall result of a base task. + */ +public class BaseTaskResult { + + private final Throwable mT; + + BaseTaskResult(@Nullable Throwable t) { + mT = t; + } + + boolean succeeded() { + return mT == null; + } + + Optional getThrowable() { + return Optional.ofNullable(mT); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/BatchPathWaiter.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/BatchPathWaiter.java new file mode 100644 index 000000000000..00f285a57225 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/BatchPathWaiter.java @@ -0,0 +1,103 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.function.Function; + +class BatchPathWaiter extends BaseTask { + private static final Logger LOG = LoggerFactory.getLogger(BatchPathWaiter.class); + private static final AlluxioURI EMPTY = new AlluxioURI(""); + + final List mLastCompleted; + final PathSequence mNoneCompleted; + + BatchPathWaiter( + TaskInfo info, long startTime, + Function> clientSupplier, + boolean removeOnComplete) { + super(info, startTime, clientSupplier, removeOnComplete); + mNoneCompleted = new PathSequence(EMPTY, info.getAlluxioPath()); + mLastCompleted = Lists.newArrayList(mNoneCompleted); + } + + @VisibleForTesting + List getLastCompleted() { + return mLastCompleted; + } + + @Override + public synchronized boolean waitForSync(AlluxioURI path) { + while (true) { + if (mIsCompleted != null) { + return mIsCompleted.succeeded(); + } + PathSequence minCompleted = mLastCompleted.get(0); + if (minCompleted != mNoneCompleted) { + if (minCompleted.getStart().compareTo(path) <= 0 + && minCompleted.getEnd().compareTo(path) > 0) { + return true; + } + } + try { + wait(); + } catch (InterruptedException e) { + LOG.debug("Interrupted while waiting for synced path {}", path); + return false; + } + } + } + + @Override + public synchronized void nextCompleted(SyncProcessResult completed) { + if (!completed.getLoaded().isPresent()) { + return; + } + PathSequence loaded = completed.getLoaded().get(); + AlluxioURI newRight = null; + AlluxioURI newLeft = null; + int i = 0; + for (; i < mLastCompleted.size(); i++) { + int rightCmp = mLastCompleted.get(i).getStart().compareTo(loaded.getEnd()); + if (rightCmp == 0) { + newRight = mLastCompleted.get(i).getEnd(); + } + if (rightCmp >= 0) { + break; + } + int leftCmp = mLastCompleted.get(i).getEnd().compareTo(loaded.getStart()); + if (leftCmp == 0) { + newLeft = mLastCompleted.get(i).getStart(); + } + } + if (newRight == null && newLeft == null) { + mLastCompleted.add(i, loaded); + } else if (newRight != null && newLeft != null) { + mLastCompleted.set(i, new PathSequence(newLeft, newRight)); + mLastCompleted.remove(i - 1); + } else if (newLeft != null) { + mLastCompleted.set(i - 1, new PathSequence(newLeft, loaded.getEnd())); + } else { + mLastCompleted.set(i, new PathSequence(loaded.getStart(), newRight)); + } + notifyAll(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/DefaultSyncProcess.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/DefaultSyncProcess.java new file mode 100644 index 000000000000..f117eaf6433d --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/DefaultSyncProcess.java @@ -0,0 +1,973 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.client.WriteType; +import alluxio.collections.Pair; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.AccessControlException; +import alluxio.exception.BlockInfoException; +import alluxio.exception.DirectoryNotEmptyException; +import alluxio.exception.ExceptionMessage; +import alluxio.exception.FileAlreadyExistsException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.exception.runtime.InvalidArgumentRuntimeException; +import alluxio.exception.runtime.NotFoundRuntimeException; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.CreateFilePOptions; +import alluxio.grpc.DeletePOptions; +import alluxio.grpc.FileSystemMasterCommonPOptions; +import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.TtlAction; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.CreateFileContext; +import alluxio.master.file.contexts.DeleteContext; +import alluxio.master.file.contexts.InternalOperationContext; +import alluxio.master.file.contexts.SetAttributeContext; +import alluxio.master.file.meta.Inode; +import alluxio.master.file.meta.InodeFile; +import alluxio.master.file.meta.InodeIterationResult; +import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.LockedInodePath; +import alluxio.master.file.meta.LockingScheme; +import alluxio.master.file.meta.MountTable; +import alluxio.master.file.meta.UfsAbsentPathCache; +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.master.file.meta.UfsSyncUtils; +import alluxio.master.file.meta.options.MountInfo; +import alluxio.master.metastore.ReadOnlyInodeStore; +import alluxio.master.metastore.ReadOption; +import alluxio.master.metastore.SkippableInodeIterator; +import alluxio.resource.CloseableResource; +import alluxio.security.authorization.Mode; +import alluxio.underfs.Fingerprint; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsDirectoryStatus; +import alluxio.underfs.UfsFileStatus; +import alluxio.underfs.UfsManager; +import alluxio.underfs.UfsStatus; +import alluxio.underfs.UnderFileSystem; +import alluxio.util.CommonUtils; +import alluxio.util.FileSystemOptionsUtils; +import alluxio.util.IteratorUtils; +import alluxio.util.io.PathUtils; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.collect.Iterators; +import com.google.common.collect.PeekingIterator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Stream; +import javax.annotation.Nullable; + +/** + * The default metadata sync processor. + */ +public class DefaultSyncProcess implements SyncProcess { + /** + * the mount point not found runtime exception. + */ + public static class MountPointNotFoundRuntimeException extends NotFoundRuntimeException { + /** + * Creates the instance. + * @param message the error message + */ + public MountPointNotFoundRuntimeException(String message) { + super(message); + } + } + + public static final FileSystemMasterCommonPOptions NO_TTL_OPTION = + FileSystemMasterCommonPOptions.newBuilder() + .setTtl(-1) + .setTtlAction( + Configuration.getEnum(PropertyKey.USER_FILE_CREATE_TTL_ACTION, TtlAction.class)) + .build(); + private static final Logger LOG = LoggerFactory.getLogger(DefaultSyncProcess.class); + private final DefaultFileSystemMaster mFsMaster; + private final ReadOnlyInodeStore mInodeStore; + private final MountTable mMountTable; + private final InodeTree mInodeTree; + + private final TaskTracker mTaskTracker; + private final MetadataSyncHandler mMetadataSyncHandler; + private final boolean mIgnoreTTL = + Configuration.getBoolean(PropertyKey.MASTER_METADATA_SYNC_IGNORE_TTL); + private final CreateFilePOptions mCreateFilePOptions = + FileSystemOptionsUtils.createFileDefaults(Configuration.global(), false).toBuilder().build(); + + private final Cache mTaskGroupMap = + CacheBuilder.newBuilder().maximumSize(1000).build(); + private final AtomicLong mTaskGroupIds = new AtomicLong(0); + + private final UfsAbsentPathCache mUfsAbsentCache; + + /** + * Constructs a default metadata sync processor. + * + * @param fsMaster the file system master + * @param inodeStore the inode store + * @param mountTable the mount table + * @param inodeTree the inode tree + * @param syncPathCache the sync path cache + * @param absentPathCache the absent path cache + */ + public DefaultSyncProcess( + DefaultFileSystemMaster fsMaster, ReadOnlyInodeStore inodeStore, + MountTable mountTable, InodeTree inodeTree, + UfsSyncPathCache syncPathCache, UfsAbsentPathCache absentPathCache) { + mFsMaster = fsMaster; + mInodeStore = inodeStore; + mMountTable = mountTable; + mInodeTree = inodeTree; + mTaskTracker = new TaskTracker( + Configuration.getInt(PropertyKey.MASTER_METADATA_SYNC_EXECUTOR_POOL_SIZE), + Configuration.getInt(PropertyKey.MASTER_METADATA_SYNC_UFS_CONCURRENT_LOADS), + Configuration.getBoolean(PropertyKey.MASTER_METADATA_SYNC_UFS_CONCURRENT_GET_STATUS), + Configuration.getBoolean(PropertyKey.MASTER_METADATA_SYNC_UFS_CONCURRENT_LISTING), + syncPathCache, absentPathCache, this, this::getUfsClient); + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, fsMaster, inodeTree); + mUfsAbsentCache = absentPathCache; + } + + private static String ufsPathToAlluxioPath(String ufsPath, String ufsMount, String alluxioMount) { + // first check if the ufsPath is the ufsMount path + if (ufsPath.length() < ufsMount.length() + && !ufsPath.endsWith(AlluxioURI.SEPARATOR)) { + Preconditions.checkState(ufsMount.equals(ufsPath + AlluxioURI.SEPARATOR)); + ufsPath = ufsMount; + } + // ufs path will be the full path (but will not include the bucket) + // e.g. nested/file or /nested/file + // ufsMount will include the ufs mount path without the bucket, eg /nested/ + // First remove the ufsMount from ufsPath, including the first / so that + // ufsPath does not start with / + if (ufsPath.startsWith(AlluxioURI.SEPARATOR)) { + ufsPath = ufsPath.substring(ufsMount.length()); + } else { + ufsPath = ufsPath.substring(ufsMount.length() - 1); + } + // now append the alluxio mount path to the ufs path + // the alluxio mount path will be something like /a/b/c + return alluxioMount + ufsPath; + } + + /** + * @param groupId the id of the task group + * @return the {@link TaskGroup} corresponding to the id + */ + public Optional getTaskGroup(long groupId) { + return Optional.ofNullable(mTaskGroupMap.getIfPresent(groupId)); + } + + /** + * Perform a metadata sync on the given path. Launches the task asynchronously. + * If descendent type is ALL, then a task is launched for each nested mount. + * + * @param alluxioPath the path to sync + * @param descendantType the depth of descendant to load + * @param directoryLoadType the type of listing to do on directories in the UFS + * @param syncInterval the sync interval to check if a sync is needed + * @param startAfter the start after mark where the sync starts + * @param isAsyncMetadataLoading if the sync is initiated by an async load metadata cli command + * @return the running task group + */ + public TaskGroup syncPath( + AlluxioURI alluxioPath, DescendantType descendantType, DirectoryLoadType directoryLoadType, + long syncInterval, @Nullable String startAfter, boolean isAsyncMetadataLoading) + throws InvalidPathException { + startAfter = stripPrefixIfPresent(alluxioPath, startAfter); + if (startAfter != null && descendantType == DescendantType.ALL + && directoryLoadType != DirectoryLoadType.SINGLE_LISTING) { + throw new InvalidPathException( + "StartAfter param does not work with BFS/DFS directory load type"); + } + MountTable.Resolution resolution = mMountTable.resolve(alluxioPath); + Stream tasks = Stream.empty(); + long groupId = mTaskGroupIds.getAndIncrement(); + if (descendantType == DescendantType.ALL) { + List nestedMounts = mMountTable.findChildrenMountPoints(alluxioPath, false); + if (nestedMounts.size() > 0) { + if (startAfter != null) { + throw new InvalidPathException("StartAfter param does not work with nested mount"); + } + } + tasks = nestedMounts.stream().map(mountInfo -> + mTaskTracker.launchTaskAsync(mMetadataSyncHandler, mountInfo.getUfsUri(), + mountInfo.getAlluxioUri(), null, descendantType, + syncInterval, directoryLoadType, !isAsyncMetadataLoading)); + } + AlluxioURI ufsPath = resolution.getUri(); + TaskGroup group = new TaskGroup(groupId, + Stream.concat(Stream.of(mTaskTracker.launchTaskAsync( + mMetadataSyncHandler, ufsPath, alluxioPath, + startAfter, descendantType, syncInterval, directoryLoadType, + !isAsyncMetadataLoading)), tasks) + .toArray(BaseTask[]::new)); + mTaskGroupMap.put(groupId, group); + return group; + } + + /** + * Perform a metadata sync on the given path. Launches the task asynchronously. + * If descendant type is ALL, then a task is launched for each nested mount. + * + * @param alluxioPath the path to sync + * @param descendantType the depth of descendents to load + * @param directoryLoadType the type of listing to do on directories in the UFS + * @param syncInterval the sync interval to check if a sync is needed + * @return the running task + */ + public TaskGroup syncPath( + AlluxioURI alluxioPath, DescendantType descendantType, DirectoryLoadType directoryLoadType, + long syncInterval) throws InvalidPathException { + return syncPath(alluxioPath, descendantType, directoryLoadType, syncInterval, null, false); + } + + private CloseableResource getUfsClient(AlluxioURI ufsPath) { + CloseableResource ufsResource = + getClient(reverseResolve(ufsPath)).acquireUfsResource(); + return new CloseableResource(ufsResource.get()) { + @Override + public void closeResource() { + ufsResource.closeResource(); + } + }; + } + + private UfsManager.UfsClient getClient(MountTable.ReverseResolution reverseResolution) { + UfsManager.UfsClient ufsClient = mMountTable.getUfsClient( + reverseResolution.getMountInfo().getMountId()); + if (ufsClient == null) { + throw new NotFoundRuntimeException(String.format("Mount not found for UFS path %s", + reverseResolution.getMountInfo().getUfsUri())); + } + return ufsClient; + } + + private MountTable.ReverseResolution reverseResolve( + AlluxioURI ufsPath) throws MountPointNotFoundRuntimeException { + MountTable.ReverseResolution reverseResolution = mMountTable.reverseResolve( + ufsPath); + if (reverseResolution == null) { + throw new MountPointNotFoundRuntimeException(String.format("Mount not found for UFS path %s", + ufsPath)); + } + return reverseResolution; + } + + @Override + public SyncProcessResult performSync( + LoadResult loadResult, UfsSyncPathCache syncPathCache) throws Throwable { + try (SyncProcessContext context = + SyncProcessContext.Builder.builder( + mFsMaster.createNonMergingJournalRpcContext( + new InternalOperationContext()), loadResult).build()) { + MountTable.ReverseResolution reverseResolution + = reverseResolve(loadResult.getBaseLoadPath()); + try (CloseableResource ufsResource = + getClient(reverseResolution).acquireUfsResource()) { + UnderFileSystem ufs = ufsResource.get(); + final MountInfo mountInfo = reverseResolution.getMountInfo(); + + // this is the full mount, eg S3://bucket/dir + AlluxioURI ufsMountURI = reverseResolution.getMountInfo().getUfsUri(); + // this is the base of the mount, eg s3://bucket/ + AlluxioURI ufsMountBaseUri = new AlluxioURI(ufsMountURI.getRootPath()); + // and without the s3://bucket, e.g. the above would be /dir + / + final String ufsMountPath = PathUtils.normalizePath( + ufsMountURI.getPath(), AlluxioURI.SEPARATOR); + // the loaded and normalized ufs path without the bucket, e.g. /dir/ + final String baseLoadPath = PathUtils.normalizePath(loadResult.getBaseLoadPath().getPath(), + AlluxioURI.SEPARATOR); + + // the mounted path in alluxio, eg /mount + AlluxioURI alluxioMountUri = reverseResolution.getMountInfo().getAlluxioUri(); + final String alluxioMountPath = PathUtils.normalizePath( + alluxioMountUri.getPath(), AlluxioURI.SEPARATOR); + // the Alluxio path that was loaded from the UFS + AlluxioURI alluxioSyncPath = reverseResolution.getUri(); + // the completed path sequence is from the previous load's + // last sync path, until our last UFS item + AlluxioURI syncStart = new AlluxioURI(ufsPathToAlluxioPath(loadResult.getPreviousLast() + .orElse(loadResult.getBaseLoadPath()).getPath(), ufsMountPath, alluxioMountPath)); + LOG.debug("Syncing from {}, load batch id {}, load id {}", syncStart, + loadResult.getLoadRequest().getBatchSetId(), + loadResult.getLoadRequest().getLoadRequestId()); + Stream stream = loadResult.getUfsLoadResult().getItems().map(status -> { + UfsItem item = new UfsItem(status, ufsMountPath, alluxioMountPath); + try { + // If we are loading by directory, then we must create a new load task on each + // directory traversed + if (loadResult.getTaskInfo().hasDirLoadTasks() && status.isDirectory() + && !item.mAlluxioUri.isAncestorOf(loadResult.getTaskInfo().getAlluxioPath(), false) + && !(baseLoadPath.equals( + PathUtils.normalizePathStart(status.getName(), AlluxioURI.SEPARATOR)))) { + // first check if the directory needs to be synced + if (syncPathCache.shouldSyncPath(item.mAlluxioUri, + loadResult.getTaskInfo().getSyncInterval(), + loadResult.getTaskInfo().getDescendantType()).isShouldSync()) { + AlluxioURI childDirectoryPath = ufsMountBaseUri.join(status.getName()); + MountTable.ReverseResolution childDirectoryReverseResolution = + mMountTable.reverseResolve(childDirectoryPath); + Preconditions.checkNotNull(childDirectoryReverseResolution); + MountTable.Resolution childDirectoryResolution = + mMountTable.resolve(childDirectoryReverseResolution.getUri()); + if (childDirectoryReverseResolution.getMountInfo().getMountId() + == childDirectoryResolution.getMountId()) { + loadResult.getTaskInfo().getMdSync() + .loadNestedDirectory(loadResult.getTaskInfo().getId(), + ufsMountBaseUri.join(status.getName())); + } else { + LOG.warn("Sync of path {} is skipped as the directory is a mount point. " + + "Mount point {}, conflict mount point {}", reverseResolution.getUri(), + childDirectoryReverseResolution.getMountInfo().getUfsUri(), + childDirectoryResolution.getUfsMountPointUri()); + } + } + } + } catch (Exception e) { + throw new InvalidArgumentRuntimeException(e); + } + return item; + }); + + PeekingIterator ufsIterator = Iterators.peekingIterator(stream.iterator()); + // Check if the root of the path being synced is a file + UfsItem firstItem = ufsIterator.hasNext() ? ufsIterator.peek() : null; + boolean baseSyncPathIsFile = firstItem != null && firstItem.mUfsItem.isFile() + && PathUtils.normalizePathStart(firstItem.mUfsItem.getName(), AlluxioURI.SEPARATOR) + .equals(loadResult.getBaseLoadPath().getPath()); + + LOG.debug("Processing sync from {}", firstItem == null ? "" : firstItem.mAlluxioPath); + // this variable will keep the last UfsStatus returned + UfsItem lastUfsStatus; + ReadOption.Builder readOptionBuilder = ReadOption.newBuilder(); + // we start iterating the Alluxio metadata from the end of the + // previous load batch, or if this is the first load then the base + // load path + AlluxioURI readFrom = new AlluxioURI(ufsPathToAlluxioPath( + loadResult.getPreviousLast().map(AlluxioURI::getPath).orElse( + baseLoadPath), ufsMountPath, alluxioMountPath)); + // we skip the initial inode if this is not the initial listing, as this + // inode was processed in the previous listing + boolean skipInitialReadFrom = loadResult.getPreviousLast().isPresent(); + Preconditions.checkState(readFrom.getPath().startsWith(alluxioMountUri.getPath())); + loadResult.getPreviousLast().ifPresent(prevLast -> { + String prevLastAlluxio = ufsPathToAlluxioPath( + prevLast.getPath(), ufsMountPath, alluxioMountPath); + String readFromSubstring = prevLastAlluxio.substring( + alluxioSyncPath.getPath().endsWith(AlluxioURI.SEPARATOR) + ? alluxioSyncPath.getPath().length() : alluxioSyncPath.getPath().length() + 1); + readOptionBuilder.setReadFrom(readFromSubstring); + }); + // We stop iterating the Alluxio metadata at the last loaded item if the load result + // is truncated + AlluxioURI readUntil = null; + if (loadResult.getUfsLoadResult().isTruncated() + && loadResult.getUfsLoadResult().getLastItem().isPresent()) { + readUntil = new AlluxioURI(ufsPathToAlluxioPath( + loadResult.getUfsLoadResult().getLastItem().get().getPath(), + ufsMountPath, alluxioMountPath)); + } + + // Take the root of the sync path as a write_edge (unless it is the mount path + // as in this case we will not modify the node), once we traverse + // past this node, we will downgrade it to a read lock in + // SyncProcessState.getNextInode + InodeTree.LockPattern rootLockPattern = alluxioSyncPath.equals(alluxioMountUri) + ? InodeTree.LockPattern.READ : InodeTree.LockPattern.WRITE_EDGE; + LockingScheme lockingScheme = new LockingScheme(alluxioSyncPath, + rootLockPattern, false); + try (LockedInodePath lockedInodePath = + mInodeTree.lockInodePath( + lockingScheme, context.getRpcContext().getJournalContext())) { + // after taking the lock on the root path, + // we must verify the mount is still valid + String ufsMountUriString = PathUtils.normalizePath(ufsMountPath, AlluxioURI.SEPARATOR); + String ufsMountUriStringAfterTakingLock = + PathUtils.normalizePath(mMountTable.resolve(alluxioSyncPath) + .getUfsMountPointUri().getPath(), AlluxioURI.SEPARATOR); + if (!ufsMountUriString.equals(ufsMountUriStringAfterTakingLock)) { + NotFoundRuntimeException ex = new NotFoundRuntimeException(String.format( + "Mount path %s no longer exists during sync of %s", + ufsMountURI, alluxioSyncPath)); + handleConcurrentModification(context, alluxioSyncPath.getPath(), true, ex); + throw ex; + } + boolean containsNestedMount = context.getDescendantType() != DescendantType.NONE + && mMountTable.findChildrenMountPoints(alluxioSyncPath, false).size() > 0; + // Get the inode of the sync start + try (SkippableInodeIterator inodeIterator = mInodeStore.getSkippableChildrenIterator( + readOptionBuilder.build(), context.getDescendantType(), loadResult.isFirstLoad(), + lockedInodePath)) { + SyncProcessState syncState = new SyncProcessState(alluxioMountPath, + alluxioSyncPath, lockedInodePath, loadResult.isFirstLoad(), + readFrom, skipInitialReadFrom, readUntil, + context, inodeIterator, ufsIterator, mountInfo, ufs, containsNestedMount); + lastUfsStatus = updateMetadataSync(syncState); + } + if (lockedInodePath.fullPathExists() && lockedInodePath.getInode().isDirectory() + && !lockedInodePath.getInode().asDirectory().isDirectChildrenLoaded()) { + // check if the root sync path should have its children marked as loaded + context.addDirectoriesToUpdateIsChildrenLoaded(lockedInodePath.getUri()); + } + } + context.updateAbsentCache(mUfsAbsentCache); + AlluxioURI syncEnd = lastUfsStatus == null ? syncStart + : lastUfsStatus.mAlluxioUri; + PathSequence pathSequence = new PathSequence(syncStart, syncEnd); + LOG.debug("Completed processing sync from {} until {}", syncStart, syncEnd); + return new SyncProcessResult(loadResult.getTaskInfo(), loadResult.getBaseLoadPath(), + pathSequence, loadResult.getUfsLoadResult().isTruncated(), + baseSyncPathIsFile); + } + } + } + + private UfsItem updateMetadataSync(SyncProcessState syncState) + throws IOException, FileDoesNotExistException, FileAlreadyExistsException, BlockInfoException, + AccessControlException, DirectoryNotEmptyException, InvalidPathException { + InodeIterationResult currentInode = syncState.getNextInode(); + if (currentInode != null && currentInode.getLockedPath().getUri().equals( + syncState.mMountInfo.getAlluxioUri())) { + // skip the inode of the mount path + currentInode = syncState.getNextInode(); + } + // We don't want to include the inode that we are reading from, so skip until we are sure + // we are passed that + while (syncState.mUfsStatusIterator.hasNext() && currentInode != null + && ((syncState.mSkipInitialReadFrom + && syncState.mReadFrom.compareTo(currentInode.getLockedPath().getUri()) >= 0) + || (!syncState.mSkipInitialReadFrom + && syncState.mReadFrom.compareTo(currentInode.getLockedPath().getUri()) > 0))) { + currentInode = syncState.getNextInode(); + } + UfsItem currentUfsStatus = IteratorUtils.nextOrNull( + syncState.mUfsStatusIterator); + // skip the initial mount path of the UFS status + // as well as the base sync path if this is not our first load task + if (currentUfsStatus != null + && (currentUfsStatus.mAlluxioPath.equals(syncState.mAlluxioMountPath) + || (!syncState.mIsFirstLoad + && currentUfsStatus.mAlluxioUri.equals(syncState.mAlluxioSyncPath)))) { + currentUfsStatus = IteratorUtils.nextOrNull( + syncState.mUfsStatusIterator); + } + UfsItem lastUfsStatus = currentUfsStatus; + + // Case A. Alluxio /foo and UFS /bar + // 1. WRITE_LOCK lock /bar + // 2. create /bar + // 3. unlock /bar + // 4. move UFS pointer + // Case B. Alluxio /bar and UFS /foo + // 1. WRITE_LOCK lock /bar + // 2. delete /bar RECURSIVELY (call fs master) + // 3. unlock /bar + // 4. move Alluxio pointer and SKIP the children of /foo + // Case C. Alluxio /foo and Alluxio /foo + // 1. compare the fingerprint + // 2. WRITE_LOCK /foo + // 3. update the metadata + // 4. unlock /foo + // 5. move two pointers + while (currentInode != null || currentUfsStatus != null) { + SingleInodeSyncResult result = performSyncOne(syncState, currentUfsStatus, currentInode); + if (result.mSkipChildren) { + syncState.mInodeIterator.skipChildrenOfTheCurrent(); + } + if (result.mMoveInode) { + currentInode = syncState.getNextInode(); + } + if (result.mMoveUfs) { + currentUfsStatus = IteratorUtils.nextOrNull(syncState.mUfsStatusIterator); + lastUfsStatus = currentUfsStatus == null ? lastUfsStatus : currentUfsStatus; + } + } + Preconditions.checkState(!syncState.mUfsStatusIterator.hasNext()); + return lastUfsStatus; + } + + private void checkShouldSetDescendantsLoaded(Inode inode, SyncProcessState syncState) + throws FileDoesNotExistException, InvalidPathException { + // Mark directories as having their children loaded based on the sync descendent type + if (syncState.mContext.getDescendantType() != DescendantType.NONE) { + if (inode.isDirectory() && !inode.asDirectory().isDirectChildrenLoaded()) { + AlluxioURI inodePath = mInodeTree.getPath(inode.getId()); + // The children have been loaded if + // (1) The descendant type is ALL and the inode is contained in the sync path + // (2) The descendant type is ONE and the inode is the synced path + if ((syncState.mContext.getDescendantType() == DescendantType.ALL + && syncState.mAlluxioSyncPath.isAncestorOf(inodePath, false)) + || (syncState.mContext.getDescendantType() == DescendantType.ONE + && syncState.mAlluxioSyncPath.equals(inodePath))) { + syncState.mContext.addDirectoriesToUpdateIsChildrenLoaded(inodePath); + } + } + } + } + + protected SingleInodeSyncResult performSyncOne( + SyncProcessState syncState, + @Nullable UfsItem currentUfsStatus, + @Nullable InodeIterationResult currentInode) + throws InvalidPathException, FileDoesNotExistException, FileAlreadyExistsException, + IOException, BlockInfoException, DirectoryNotEmptyException, AccessControlException { + Optional comparisonResult = currentInode != null && currentUfsStatus != null + ? Optional.of( + currentInode.getLockedPath().getUri().compareTo(currentUfsStatus.mAlluxioUri)) : + Optional.empty(); + if (currentInode == null || (comparisonResult.isPresent() && comparisonResult.get() > 0)) { + // (Case 1) - in this case the UFS item is missing in the inode tree, so we create it + // comparisonResult is present implies that currentUfsStatus is not null + assert currentUfsStatus != null; + try (LockedInodePath lockedInodePath = syncState.mAlluxioSyncPathLocked.lockDescendant( + currentUfsStatus.mAlluxioUri, InodeTree.LockPattern.WRITE_EDGE)) { + // If the current mount point contains nested mount point, + // we need to do extra check to prevent files shadowed by mount points. + if (syncState.mContainsNestedMount) { + if (mMountTable.resolve(lockedInodePath.getUri()).getMountId() + != syncState.mMountInfo.getMountId()) { + // The file to create is shadowed by a nested mount + syncState.mContext.reportSyncOperationSuccess(SyncOperation.SKIPPED_ON_MOUNT_POINT); + return new SingleInodeSyncResult(true, false, false); + } + } + List createdInodes; + if (currentUfsStatus.mUfsItem.isDirectory()) { + createdInodes = createInodeDirectoryMetadata(syncState.mContext, lockedInodePath, + currentUfsStatus.mUfsItem, syncState); + } else { + createdInodes = createInodeFileMetadata(syncState.mContext, lockedInodePath, + currentUfsStatus.mUfsItem, syncState); + } + if (syncState.mContext.getDescendantType() != DescendantType.NONE) { + // Mark directories as having their children loaded based on the sync descendant type + for (Inode next : createdInodes) { + checkShouldSetDescendantsLoaded(next, syncState); + } + } + syncState.mContext.reportSyncOperationSuccess(SyncOperation.CREATE, createdInodes.size()); + } catch (FileAlreadyExistsException e) { + handleConcurrentModification( + syncState.mContext, currentUfsStatus.mAlluxioPath, false, e); + } + return new SingleInodeSyncResult(true, false, false); + } else if (currentUfsStatus == null || comparisonResult.get() < 0) { + if (currentInode.getInode().isDirectory() && currentUfsStatus != null + && currentInode.getLockedPath().getUri().isAncestorOf( + currentUfsStatus.mAlluxioUri, false)) { + // (Case 2) - in this case the inode is a directory and is an ancestor of the current + // UFS state, so we skip it + checkShouldSetDescendantsLoaded(currentInode.getInode(), syncState); + return new SingleInodeSyncResult(false, true, false); + } + // (Case 3) - in this case the inode is not in the UFS, so we must delete it + // unless the file is being persisted, or is not complete + try { + LockedInodePath path = currentInode.getLockedPath(); + path.traverse(); + AlluxioURI uri = currentInode.getLockedPath().getUri(); + // skip if this is a mount point, or it belongs to a nested mount point + if (mMountTable.isMountPoint(uri) + || (syncState.mContainsNestedMount && mMountTable.resolve(uri).getMountId() + != syncState.mMountInfo.getMountId())) { + // the mount point will be synced through another sync task if + // descendant type is ALL. + return new SingleInodeSyncResult(false, true, true); + } + Pair deletedInodes = deletePath(syncState.mContext, path, true); + if (deletedInodes.getFirst() > 0) { + syncState.mContext.reportSyncOperationSuccess(SyncOperation.DELETE, + deletedInodes.getFirst()); + } + if (deletedInodes.getSecond() > 0) { + syncState.mContext.reportSyncOperationSuccess(SyncOperation.SKIPPED_NON_PERSISTED, + deletedInodes.getSecond()); + } + } catch (FileDoesNotExistException e) { + handleConcurrentModification( + syncState.mContext, currentInode.getLockedPath().getUri().getPath(), false, e); + } + return new SingleInodeSyncResult(false, true, true); + } + // (Case 4) - in this case both the inode, and the UFS item exist, so we check if we need + // to update the metadata + LockedInodePath lockedInodePath = currentInode.getLockedPath(); + lockedInodePath.traverse(); + // skip if this is a mount point + if (mMountTable.isMountPoint(currentInode.getLockedPath().getUri())) { + syncState.mContext.reportSyncOperationSuccess(SyncOperation.SKIPPED_ON_MOUNT_POINT, 1); + return new SingleInodeSyncResult(true, true, true); + } + // skip if the file is not complete or not persisted + if (lockedInodePath.getInode().isFile()) { + InodeFile inodeFile = lockedInodePath.getInodeFile(); + if (!inodeFile.isCompleted() || !inodeFile.isPersisted()) { + syncState.mContext.reportSyncOperationSuccess(SyncOperation.SKIPPED_NON_PERSISTED, 1); + return new SingleInodeSyncResult(true, true, false); + } + } + // HDFS also fetches ACL list, which is ignored for now + String ufsType = syncState.mUfs.getUnderFSType(); + Fingerprint ufsFingerprint = Fingerprint.create(ufsType, currentUfsStatus.mUfsItem); + boolean containsMountPoint = mMountTable.containsMountPoint( + currentInode.getLockedPath().getUri(), true, false); + UfsSyncUtils.SyncPlan syncPlan = + UfsSyncUtils.computeSyncPlan(currentInode.getInode(), ufsFingerprint, containsMountPoint); + if (syncPlan.toUpdateMetaData() || syncPlan.toDelete() || syncPlan.toLoadMetadata()) { + try { + if (syncPlan.toUpdateMetaData()) { + updateInodeMetadata(syncState.mContext, lockedInodePath, currentUfsStatus.mUfsItem, + ufsFingerprint); + syncState.mContext.reportSyncOperationSuccess(SyncOperation.UPDATE); + } else if (syncPlan.toDelete() && syncPlan.toLoadMetadata()) { + if (lockedInodePath.getInode().isDirectory()) { + throw new InternalRuntimeException( + String.format("Deleting directory %s in metadata sync due to metadata change", + lockedInodePath.getUri())); + } + deletePath(syncState.mContext, lockedInodePath, false); + lockedInodePath.removeLastInode(); + try (LockedInodePath newLockedInodePath = mInodeTree.lockInodePath( + lockedInodePath.getUri(), InodeTree.LockPattern.WRITE_EDGE, + syncState.mContext.getMetadataSyncJournalContext())) { + if (currentUfsStatus.mUfsItem.isDirectory()) { + createInodeDirectoryMetadata(syncState.mContext, newLockedInodePath, + currentUfsStatus.mUfsItem, syncState); + } else { + createInodeFileMetadata(syncState.mContext, newLockedInodePath, + currentUfsStatus.mUfsItem, syncState); + } + } + syncState.mContext.reportSyncOperationSuccess(SyncOperation.RECREATE); + } else { + throw new IllegalStateException("We should never reach here."); + } + } catch (FileDoesNotExistException | FileAlreadyExistsException e) { + handleConcurrentModification( + syncState.mContext, currentInode.getLockedPath().getUri().getPath(), false, e); + } + } else { + syncState.mContext.reportSyncOperationSuccess(SyncOperation.NOOP); + } + checkShouldSetDescendantsLoaded(currentInode.getInode(), syncState); + return new SingleInodeSyncResult(true, true, false); + } + + private void handleConcurrentModification( + SyncProcessContext context, String path, boolean isRoot, Exception e) + throws FileAlreadyExistsException, FileDoesNotExistException { + String loggingMessage = "Sync metadata failed on [{}] due to concurrent modification."; + if (!isRoot && context.isConcurrentModificationAllowed()) { + context.reportSyncOperationSuccess(SyncOperation.SKIPPED_DUE_TO_CONCURRENT_MODIFICATION); + LOG.info(loggingMessage, path, e); + } else { + context.reportSyncFailReason(SyncFailReason.PROCESSING_CONCURRENT_UPDATE_DURING_SYNC, e); + LOG.error(loggingMessage, path, e); + if (e instanceof FileAlreadyExistsException) { + throw (FileAlreadyExistsException) e; + } + if (e instanceof FileDoesNotExistException) { + throw (FileDoesNotExistException) e; + } + throw new RuntimeException(e); + } + } + + private Pair deletePath( + SyncProcessContext context, LockedInodePath lockedInodePath, boolean skipNonPersisted) + throws FileDoesNotExistException, DirectoryNotEmptyException, IOException, + InvalidPathException { + DeleteContext syncDeleteContext = DeleteContext.mergeFrom( + DeletePOptions.newBuilder() + .setRecursive(true) + .setAlluxioOnly(true) + .setUnchecked(true)) + .skipNotPersisted(skipNonPersisted) + .setMetadataLoad(true); + Pair deletedInodes = mFsMaster.deleteInternal(context.getRpcContext(), + lockedInodePath, syncDeleteContext, true); + if (deletedInodes.getFirst() == 0 && deletedInodes.getSecond() == 0) { + throw new FileDoesNotExistException(lockedInodePath + " does not exist."); + } + return deletedInodes; + } + + private void updateInodeMetadata( + SyncProcessContext context, LockedInodePath lockedInodePath, + UfsStatus ufsStatus, Fingerprint fingerprint) + throws FileDoesNotExistException, AccessControlException, InvalidPathException { + // UpdateMetadata is used when a file or a directory only had metadata change. + // It works by calling SetAttributeInternal on the inodePath. + short mode = ufsStatus.getMode(); + SetAttributePOptions.Builder builder = SetAttributePOptions.newBuilder() + .setMode(new Mode(mode).toProto()); + if (!ufsStatus.getOwner().equals("")) { + builder.setOwner(ufsStatus.getOwner()); + } + if (!ufsStatus.getGroup().equals("")) { + builder.setOwner(ufsStatus.getGroup()); + } + SetAttributeContext ctx = SetAttributeContext.mergeFrom(builder) + .setUfsFingerprint(fingerprint.serialize()) + .setMetadataLoad(true); + mFsMaster.setAttributeSingleFile(context.getRpcContext(), lockedInodePath, false, + CommonUtils.getCurrentMs(), ctx); + } + + private List createInodeFileMetadata( + SyncProcessContext context, LockedInodePath lockedInodePath, + UfsStatus ufsStatus, SyncProcessState syncState + ) throws InvalidPathException, FileDoesNotExistException, FileAlreadyExistsException, + BlockInfoException, IOException { + long blockSize = ((UfsFileStatus) ufsStatus).getBlockSize(); + if (blockSize == UfsFileStatus.UNKNOWN_BLOCK_SIZE) { + throw new RuntimeException("Unknown block size"); + } + + // Metadata loaded from UFS has no TTL set. + CreateFileContext createFileContext = CreateFileContext.mergeFromDefault(mCreateFilePOptions); + createFileContext.getOptions().setBlockSizeBytes(blockSize); + // Ancestor should be created before unless it is the sync root + createFileContext.getOptions().setRecursive(true); + FileSystemMasterCommonPOptions commonPOptions = + mIgnoreTTL ? NO_TTL_OPTION : context.getCommonOptions(); + createFileContext.getOptions() + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() + .setTtl(commonPOptions.getTtl()) + .setTtlAction(commonPOptions.getTtlAction())); + Fingerprint fingerprint = Fingerprint.create(syncState.mUfs.getUnderFSType(), ufsStatus); + createFileContext.setMissingDirFingerprint(() -> + Fingerprint.create(syncState.mUfs.getUnderFSType(), + new UfsDirectoryStatus( + ufsStatus.getName(), ufsStatus.getOwner(), + ufsStatus.getGroup(), ufsStatus.getMode())).serialize()); + createFileContext.setFingerprint(fingerprint.serialize()); + + createFileContext.setWriteType(WriteType.THROUGH); // set as through since already in UFS + createFileContext.setMetadataLoad(true, false); + createFileContext.setOwner(ufsStatus.getOwner()); + createFileContext.setGroup(ufsStatus.getGroup()); + createFileContext.setXAttr(ufsStatus.getXAttr()); + short ufsMode = ufsStatus.getMode(); + Mode mode = new Mode(ufsMode); + Long ufsLastModified = ufsStatus.getLastModifiedTime(); + if (syncState.mMountInfo.getOptions().getShared()) { + mode.setOtherBits(mode.getOtherBits().or(mode.getOwnerBits())); + } + createFileContext.getOptions().setMode(mode.toProto()); + // NO ACL for now + if (ufsLastModified != null) { + createFileContext.setOperationTimeMs(ufsLastModified); + } + List result = mFsMaster.createCompleteFileInternalForMetadataSync( + context.getRpcContext(), lockedInodePath, createFileContext, (UfsFileStatus) ufsStatus); + context.addDirectoriesToUpdateAbsentCache(lockedInodePath.getUri().getParent()); + return result; + } + + private List createInodeDirectoryMetadata( + SyncProcessContext context, LockedInodePath lockedInodePath, + UfsStatus ufsStatus, SyncProcessState syncState + ) throws InvalidPathException, FileDoesNotExistException, FileAlreadyExistsException, + IOException { + MountTable.Resolution resolution = mMountTable.resolve(lockedInodePath.getUri()); + boolean isMountPoint = mMountTable.isMountPoint(lockedInodePath.getUri()); + + CreateDirectoryContext createDirectoryContext = CreateDirectoryContext.defaults(); + createDirectoryContext.getOptions() + .setRecursive(true) + .setAllowExists(false) + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() + .setTtl(context.getCommonOptions().getTtl()) + .setTtlAction(context.getCommonOptions().getTtlAction())); + createDirectoryContext.setMountPoint(isMountPoint); + createDirectoryContext.setMetadataLoad(true, false); + createDirectoryContext.setWriteType(WriteType.THROUGH); + String dirFingerprint = Fingerprint.create( + syncState.mUfs.getUnderFSType(), ufsStatus).serialize(); + createDirectoryContext.setMissingDirFingerprint(() -> dirFingerprint); + createDirectoryContext.setFingerprint(dirFingerprint); + + String ufsOwner = ufsStatus.getOwner(); + String ufsGroup = ufsStatus.getGroup(); + short ufsMode = ufsStatus.getMode(); + Long lastModifiedTime = ufsStatus.getLastModifiedTime(); + Mode mode = new Mode(ufsMode); + if (resolution.getShared()) { + mode.setOtherBits(mode.getOtherBits().or(mode.getOwnerBits())); + } + createDirectoryContext.getOptions().setMode(mode.toProto()); + createDirectoryContext + .setOwner(ufsOwner) + .setGroup(ufsGroup) + .setUfsStatus(ufsStatus); + createDirectoryContext.setXAttr(ufsStatus.getXAttr()); + + if (lastModifiedTime != null) { + createDirectoryContext.setOperationTimeMs(lastModifiedTime); + } + return mFsMaster.createDirectoryInternal( + context.getRpcContext(), + lockedInodePath, + resolution.getUfsClient(), + resolution.getUri(), + createDirectoryContext + ); + } + + /** + * @return the task tracker + */ + public TaskTracker getTaskTracker() { + return mTaskTracker; + } + + static final class UfsItem { + final UfsStatus mUfsItem; + final String mAlluxioPath; + final AlluxioURI mAlluxioUri; + + UfsItem(UfsStatus ufsStatus, String ufsMount, String alluxioMount) { + mAlluxioPath = ufsPathToAlluxioPath(ufsStatus.getName(), ufsMount, alluxioMount); + mAlluxioUri = new AlluxioURI(mAlluxioPath); + mUfsItem = ufsStatus; + } + } + + @VisibleForTesting + static final class SyncProcessState { + final String mAlluxioMountPath; + final AlluxioURI mAlluxioSyncPath; + final LockedInodePath mAlluxioSyncPathLocked; + final AlluxioURI mReadFrom; + final boolean mSkipInitialReadFrom; + final AlluxioURI mReadUntil; + final SyncProcessContext mContext; + final SkippableInodeIterator mInodeIterator; + final Iterator mUfsStatusIterator; + final MountInfo mMountInfo; + final UnderFileSystem mUfs; + final boolean mIsFirstLoad; + final boolean mContainsNestedMount; + boolean mTraversedRootPath = false; + boolean mDowngradedRootPath = false; + + SyncProcessState( + String alluxioMountPath, + AlluxioURI alluxioSyncPath, + LockedInodePath alluxioSyncPathLocked, + boolean isFirstLoad, + AlluxioURI readFrom, boolean skipInitialReadFrom, + @Nullable AlluxioURI readUntil, + SyncProcessContext context, + SkippableInodeIterator inodeIterator, + Iterator ufsStatusIterator, + MountInfo mountInfo, UnderFileSystem underFileSystem, + boolean containsNestedMount) { + mAlluxioMountPath = alluxioMountPath; + mAlluxioSyncPath = alluxioSyncPath; + mAlluxioSyncPathLocked = alluxioSyncPathLocked; + mIsFirstLoad = isFirstLoad; + mReadFrom = readFrom; + mSkipInitialReadFrom = skipInitialReadFrom; + mReadUntil = readUntil; + mContext = context; + mInodeIterator = inodeIterator; + mUfsStatusIterator = ufsStatusIterator; + mMountInfo = mountInfo; + mUfs = underFileSystem; + mContainsNestedMount = containsNestedMount; + } + + private void downgradeRootPath() { + // once we have traversed the root sync path we downgrade it to a read lock + mAlluxioSyncPathLocked.downgradeToRead(); + mDowngradedRootPath = true; + } + + @Nullable + InodeIterationResult getNextInode() throws InvalidPathException { + if (mTraversedRootPath && !mDowngradedRootPath) { + downgradeRootPath(); + } + mTraversedRootPath = true; + InodeIterationResult next = IteratorUtils.nextOrNull(mInodeIterator); + if (next != null) { + if (!mAlluxioSyncPath.isAncestorOf(next.getLockedPath().getUri(), false)) { + downgradeRootPath(); + return null; + } + if (mReadUntil != null) { + if (next.getLockedPath().getUri().compareTo(mReadUntil) > 0) { + downgradeRootPath(); + return null; + } + } + } + return next; + } + } + + protected static class SingleInodeSyncResult { + boolean mMoveUfs; + boolean mMoveInode; + boolean mSkipChildren; + + public SingleInodeSyncResult(boolean moveUfs, boolean moveInode, boolean skipChildren) { + mMoveUfs = moveUfs; + mMoveInode = moveInode; + mSkipChildren = skipChildren; + } + } + + private String stripPrefixIfPresent(AlluxioURI syncRoot, @Nullable String startAfter) + throws InvalidPathException { + if (startAfter == null || !startAfter.startsWith(AlluxioURI.SEPARATOR)) { + return startAfter; + } + // this path starts from the root, so we must remove the prefix + String startAfterCheck = startAfter.substring(0, + Math.min(syncRoot.getPath().length(), startAfter.length())); + if (!syncRoot.getPath().startsWith(startAfterCheck)) { + throw new InvalidPathException( + ExceptionMessage.START_AFTER_DOES_NOT_MATCH_PATH + .getMessage(startAfter, syncRoot.getPath())); + } + startAfter = startAfter.substring( + Math.min(startAfter.length(), syncRoot.getPath().length())); + if (startAfter.startsWith(AlluxioURI.SEPARATOR)) { + startAfter = startAfter.substring(1); + } + if (startAfter.equals("")) { + startAfter = null; + } + return startAfter; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/DirectoryPathWaiter.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/DirectoryPathWaiter.java new file mode 100644 index 000000000000..6ca03a5c4540 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/DirectoryPathWaiter.java @@ -0,0 +1,71 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.conf.path.TrieNode; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.function.Function; + +class DirectoryPathWaiter extends BaseTask { + private static final Logger LOG = LoggerFactory.getLogger(DirectoryPathWaiter.class); + + private final TrieNode mCompletedDirs = new TrieNode<>(); + + DirectoryPathWaiter( + TaskInfo info, long startTime, Function> clientSupplier, + boolean removeOnComplete) { + super(info, startTime, clientSupplier, removeOnComplete); + } + + @Override + public synchronized boolean waitForSync(AlluxioURI path) { + while (true) { + if (mIsCompleted != null) { + return !mIsCompleted.getThrowable().isPresent(); + } + boolean completed = mCompletedDirs.getClosestTerminal(path.getPath()) + .map(result -> { + if (result.getValue().equals(path)) { + return true; + } + AlluxioURI parent = path.getParent(); + return parent != null && parent.equals(result.getValue()); + }).orElse(false); + if (completed) { + return true; + } + try { + wait(); + } catch (InterruptedException e) { + LOG.debug("Interrupted while waiting for synced path {}", path); + return false; + } + } + } + + @Override + public synchronized void nextCompleted(SyncProcessResult completed) { + if (!completed.isTruncated()) { + LOG.debug("Completed load of path {}", completed.getBaseLoadPath()); + mCompletedDirs.insert(completed.getBaseLoadPath().getPath()) + .setValue(completed.getBaseLoadPath()); + notifyAll(); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequest.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequest.java new file mode 100644 index 000000000000..3d592ebe7f0b --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequest.java @@ -0,0 +1,157 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.file.options.DescendantType; +import alluxio.retry.CountingRetry; +import alluxio.retry.RetryPolicy; + +import java.util.Optional; +import javax.annotation.Nullable; + +/** + * This is a request for a single batch load sent to the UFS. + */ +public class LoadRequest implements Comparable { + private final TaskInfo mTaskInfo; + private final AlluxioURI mPath; + private final String mContinuationToken; + private final DescendantType mDescendantType; + private final long mId; + private final AlluxioURI mPreviousLoadLast; + private final boolean mIsFirstLoad; + /** + * This is the id of the load request that started a set of batches of load requests, i.e. + * the batches of loads until one is not truncated. + */ + private final long mBatchSetId; + private final RetryPolicy mRetryPolicy = new CountingRetry(2); + + LoadRequest( + long id, long batchSetId, TaskInfo taskInfo, AlluxioURI path, + @Nullable String continuationToken, + @Nullable AlluxioURI previousLoadLast, + DescendantType descendantType, + boolean isFirstLoad) { + taskInfo.getStats().gotLoadRequest(); + mTaskInfo = taskInfo; + mPath = path; + mId = id; + mBatchSetId = batchSetId; + mContinuationToken = continuationToken; + mDescendantType = descendantType; + mPreviousLoadLast = previousLoadLast; + mIsFirstLoad = isFirstLoad; + } + + Optional getPreviousLoadLast() { + return Optional.ofNullable(mPreviousLoadLast); + } + + /** + * @return the batch ID, i.e. the load ID of the directory that initiated this load + * if using {@link alluxio.file.options.DirectoryLoadType#BFS} or + * {@link alluxio.file.options.DirectoryLoadType#DFS} + */ + long getBatchSetId() { + return mBatchSetId; + } + + boolean attempt() { + return mRetryPolicy.attempt(); + } + + /** + * @return the task info + */ + TaskInfo getTaskInfo() { + return mTaskInfo; + } + + /** + * @return if the load request is the first load request + */ + boolean isFirstLoad() { + return mIsFirstLoad; + } + + AlluxioURI getLoadPath() { + return mPath; + } + + /** + * @return the descendant type for this specific load request. Note + * that this may be different from the descendant type of the overall + * sync operation. For example if the {@link alluxio.file.options.DirectoryLoadType} + * is BFS or DFS and the overall descendant type is ALL, then the + * descendant type of each of the load requests will be ONE. + */ + DescendantType getDescendantType() { + return mDescendantType; + } + + long getBaseTaskId() { + return mTaskInfo.getId(); + } + + /** + * @return the unique id for this specific load request + */ + long getLoadRequestId() { + return mId; + } + + @Nullable + String getContinuationToken() { + return mContinuationToken; + } + + void onError(Throwable t) { + mTaskInfo.getMdSync().onLoadRequestError(mTaskInfo.getId(), mId, t); + } + + @Override + public int compareTo(LoadRequest o) { + // First compare the directory load id + int baseTaskCmp; + switch (o.mTaskInfo.getLoadByDirectory()) { + case SINGLE_LISTING: + return Long.compare(mId, o.mId); + case DFS: + baseTaskCmp = Long.compare(o.mBatchSetId, mBatchSetId); + break; + default: + baseTaskCmp = Long.compare(mBatchSetId, o.mBatchSetId); + break; + } + if (baseTaskCmp != 0) { + return baseTaskCmp; + } + // then compare the base id + return Long.compare(mId, o.mId); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof LoadRequest) { + return compareTo((LoadRequest) obj) == 0; + } + return false; + } + + @Override + public int hashCode() { + // fix find bugs + return super.hashCode(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequestExecutor.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequestExecutor.java new file mode 100644 index 000000000000..fa0e2a06de28 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequestExecutor.java @@ -0,0 +1,271 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import static java.util.concurrent.TimeUnit.NANOSECONDS; + +import alluxio.Constants; +import alluxio.collections.ConcurrentHashSet; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsLoadResult; +import alluxio.util.logging.SamplingLogger; + +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Map; +import java.util.Optional; +import java.util.PriorityQueue; +import java.util.Set; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicInteger; +import javax.annotation.Nullable; + +class LoadRequestExecutor implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(LoadRequestExecutor.class); + private static final Logger SAMPLING_LOG = new SamplingLogger(LOG, 5L * Constants.SECOND_MS); + + /** Limit the number of running (or completed but not yet processed) load requests. **/ + private final AtomicInteger mRemainingTickets; + private final int mMaxRunning; + + private final Map mPathLoaderTasks = new ConcurrentHashMap<>(); + // Loader tasks with pending loads + private final Set mPathLoaderTasksWithPendingLoads = new ConcurrentHashSet<>(); + // Same as above, except ordered by priority + private final ConcurrentLinkedDeque mPathLoaderTaskQueue = new ConcurrentLinkedDeque<>(); + // Load requests in order of to be processed + private final BlockingQueue mLoadRequests = new LinkedBlockingQueue<>(); + // Rate limited loads that are not yet ready to be run + private final PriorityQueue mRateLimited = new PriorityQueue<>(); + + private final LoadResultExecutor mResultExecutor; + + private final Thread mExecutor; + + LoadRequestExecutor(int maxRunning, LoadResultExecutor resultExecutor) { + mMaxRunning = maxRunning; + mRemainingTickets = new AtomicInteger(maxRunning); + mResultExecutor = resultExecutor; + mExecutor = new Thread(() -> { + while (!Thread.interrupted()) { + try { + runNextLoadTask(); + } catch (InterruptedException e) { + return; + } + } + LOG.info("Load request runner thread exiting"); + }, "LoadRequestRunner"); + mExecutor.start(); + registerMetrics(); + } + + synchronized void addPathLoaderTask(PathLoaderTask task) { + long id = task.getTaskInfo().getId(); + task.runOnPendingLoad(() -> hasNewLoadTask(id)); + mPathLoaderTasks.put(id, task); + mPathLoaderTaskQueue.add(id); + mPathLoaderTasksWithPendingLoads.add(id); + notifyAll(); + } + + synchronized void hasNewLoadTask(long taskId) { + if (!mPathLoaderTasksWithPendingLoads.contains(taskId)) { + mPathLoaderTaskQueue.add(taskId); + mPathLoaderTasksWithPendingLoads.add(taskId); + notifyAll(); + } + } + + private void onLoadError(LoadRequest request, Throwable t) { + // Errors are reported on an attempt basis. A reported load error does not + // lead to the sync failure because we retry on UFS load failure. The sync + // can still proceed if the following try succeeds. + // Please refer to BaseTask::getState to get the sync task state. + if (t instanceof DefaultSyncProcess.MountPointNotFoundRuntimeException) { + request.getTaskInfo().getStats().reportSyncFailReason( + request, null, SyncFailReason.LOADING_MOUNT_POINT_DOES_NOT_EXIST, t); + } else { + request.getTaskInfo().getStats().reportSyncFailReason( + request, null, SyncFailReason.LOADING_UFS_IO_FAILURE, t); + } + releaseRunning(); + request.onError(t); + } + + private void processLoadResult(LoadRequest request, UfsLoadResult ufsLoadResult) { + Optional loadResult = request.getTaskInfo().getMdSync() + .onReceiveLoadRequestOutput(request.getBaseTaskId(), + request.getLoadRequestId(), ufsLoadResult); + synchronized (this) { + PathLoaderTask task = mPathLoaderTasks.get(request.getBaseTaskId()); + if (task != null && loadResult.isPresent()) { + LoadResult result = loadResult.get(); + mResultExecutor.processLoadResult(result, () -> { + releaseRunning(); + result.getTaskInfo().getStats().mProcessStarted.incrementAndGet(); + }, v -> { + result.getTaskInfo().getStats().mProcessCompleted.incrementAndGet(); + result.onProcessComplete(v); + }, result::onProcessError); + } else { + releaseRunning(); + if (loadResult.isPresent()) { + LOG.debug("Got a load result for id {} with no corresponding" + + "path loader task", request.getBaseTaskId()); + } + } + } + } + + private void runNextLoadTask() throws InterruptedException { + // loop until there is a task ready to execute + synchronized (this) { + while ((mLoadRequests.isEmpty() || mRemainingTickets.get() == 0) + && (mRateLimited.isEmpty() || !mRateLimited.peek().isReady())) { + // check if a task is ready to run, and we have tickets remaining + if (mRemainingTickets.get() > 0 && !mPathLoaderTaskQueue.isEmpty()) { + Long nextId = mPathLoaderTaskQueue.poll(); + if (nextId != null) { + checkNextLoad(nextId); + } + } else { // otherwise, sleep + long waitNanos = 0; + if (!mRateLimited.isEmpty()) { + waitNanos = mRateLimited.peek().getWaitTime(); + if (waitNanos <= 0) { + break; + } + } + // wait until a rate limited task is ready, or this.notifyAll() is called + if (waitNanos == 0) { + wait(); + } else { + // we only sleep if our wait time is less than 1 ms + // otherwise we spin wait + if (waitNanos >= Constants.MS_NANO) { + NANOSECONDS.timedWait(this, waitNanos); + } + } + } + } + } + SAMPLING_LOG.info("Concurrent running ufs load tasks {}, tasks with pending load requests {}," + + " rate limited pending requests {}", + mMaxRunning - mRemainingTickets.get(), mPathLoaderTasks.size(), mRateLimited.size()); + if (!mRateLimited.isEmpty() && mRateLimited.peek().isReady()) { + RateLimitedRequest request = mRateLimited.remove(); + runTask(request.mTask, request.mLoadRequest); + } else { + LoadRequest nxtRequest = mLoadRequests.take(); + PathLoaderTask task = mPathLoaderTasks.get(nxtRequest.getBaseTaskId()); + if (task != null) { + Preconditions.checkState(mRemainingTickets.decrementAndGet() >= 0); + Optional rateLimit = task.getRateLimiter().acquire(); + if (rateLimit.isPresent()) { + mRateLimited.add(new RateLimitedRequest(task, nxtRequest, rateLimit.get())); + } else { + runTask(task, nxtRequest); + } + } else { + LOG.debug("Got load request {} with task id {} with no corresponding task", + nxtRequest.getLoadRequestId(), nxtRequest.getLoadRequestId()); + } + } + } + + private synchronized void releaseRunning() { + mRemainingTickets.incrementAndGet(); + notifyAll(); + } + + synchronized void onTaskComplete(long taskId) { + mPathLoaderTasks.remove(taskId); + } + + private void runTask(PathLoaderTask task, LoadRequest loadRequest) { + try (CloseableResource client = task.getClient()) { + @Nullable String startAfter = null; + if (loadRequest.isFirstLoad()) { + startAfter = loadRequest.getTaskInfo().getStartAfter(); + } + client.get().performListingAsync(loadRequest.getLoadPath().getPath(), + loadRequest.getContinuationToken(), startAfter, + loadRequest.getDescendantType(), loadRequest.isFirstLoad(), + ufsLoadResult -> processLoadResult(loadRequest, ufsLoadResult), + t -> onLoadError(loadRequest, t)); + } catch (Throwable t) { + onLoadError(loadRequest, t); + } + } + + private void checkNextLoad(long id) { + PathLoaderTask task = mPathLoaderTasks.get(id); + if (task == null || task.isComplete()) { + mPathLoaderTasks.remove(id); + mPathLoaderTasksWithPendingLoads.remove(id); + return; + } + Optional nxtRequest = task.getNext(); + if (nxtRequest.isPresent()) { + try { + mLoadRequests.put(nxtRequest.get()); + mPathLoaderTaskQueue.addLast(id); + } catch (InterruptedException e) { + throw new InternalRuntimeException("Not expected to block here", e); + } + } else { + mPathLoaderTasksWithPendingLoads.remove(id); + } + } + + @Override + public void close() throws IOException { + mExecutor.interrupt(); + try { + mExecutor.join(5_000); + } catch (InterruptedException e) { + LOG.debug("Interrupted while waiting for load request runner to terminate"); + } + mResultExecutor.close(); + } + + private void registerMetrics() { + MetricsSystem.registerGaugeIfAbsent( + MetricsSystem.getMetricName( + MetricKey.MASTER_METADATA_SYNC_QUEUED_LOADS.getName()), + () -> { + synchronized (this) { + int count = 0; + for (PathLoaderTask task : mPathLoaderTasks.values()) { + count += task.getPendingLoadCount(); + } + return count; + } + }); + MetricsSystem.registerGaugeIfAbsent( + MetricsSystem.getMetricName( + MetricKey.MASTER_METADATA_SYNC_RUNNING_LOADS.getName()), + () -> mMaxRunning - mRemainingTickets.get()); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResult.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResult.java new file mode 100644 index 000000000000..0287441f62de --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResult.java @@ -0,0 +1,125 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.underfs.UfsLoadResult; + +import java.util.Optional; +import javax.annotation.Nullable; + +/** + * This is the result of a single batch load from the UFS. + */ +public class LoadResult implements Comparable { + private final TaskInfo mTaskInfo; + private final AlluxioURI mBaseLoadPath; + private final UfsLoadResult mUfsLoadResult; + private final LoadRequest mLoadRequest; + private final AlluxioURI mPreviousLast; + private final boolean mIsFirstLoad; + + /** + * Creates a load result. + * @param loadRequest the load request + * @param baseLoadPath the base load path + * @param taskInfo the task info + * @param previousLast the previous last load item + * @param ufsLoadResult the ufs load result + * @param isFirstLoad if the load is the first load + */ + public LoadResult( + LoadRequest loadRequest, AlluxioURI baseLoadPath, TaskInfo taskInfo, + @Nullable AlluxioURI previousLast, UfsLoadResult ufsLoadResult, + boolean isFirstLoad) { + mLoadRequest = loadRequest; + mBaseLoadPath = baseLoadPath; + mTaskInfo = taskInfo; + mUfsLoadResult = ufsLoadResult; + mPreviousLast = previousLast; + mIsFirstLoad = isFirstLoad; + } + + /** + * @return true if this is the first load + */ + public boolean isFirstLoad() { + return mIsFirstLoad; + } + + /** + * @return the last item in the previous load + */ + public Optional getPreviousLast() { + return Optional.ofNullable(mPreviousLast); + } + + /** + * @return the load path + */ + public AlluxioURI getBaseLoadPath() { + return mBaseLoadPath; + } + + /** + * @return the ufs load result + */ + public UfsLoadResult getUfsLoadResult() { + return mUfsLoadResult; + } + + /** + * @return the task info + */ + public TaskInfo getTaskInfo() { + return mTaskInfo; + } + + void onProcessComplete(SyncProcessResult result) { + mTaskInfo.getMdSync().onProcessComplete( + mTaskInfo.getId(), mLoadRequest.getLoadRequestId(), result); + } + + void onProcessError(Throwable t) { + mTaskInfo.getMdSync().onProcessError(mTaskInfo.getId(), t); + } + + /** + * @return the load request + */ + public LoadRequest getLoadRequest() { + return mLoadRequest; + } + + @Override + public int compareTo(LoadResult o) { + int idCmp = Long.compare(mTaskInfo.getId(), o.mTaskInfo.getId()); + if (idCmp != 0) { + return idCmp; + } + return mLoadRequest.compareTo(o.mLoadRequest); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof LoadResult) { + return compareTo((LoadResult) obj) == 0; + } + return false; + } + + @Override + public int hashCode() { + // fix find bugs + return super.hashCode(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResultExecutor.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResultExecutor.java new file mode 100644 index 000000000000..c5e6ae15dbab --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResultExecutor.java @@ -0,0 +1,67 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.util.ThreadFactoryUtils; + +import java.io.Closeable; +import java.io.IOException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.function.Consumer; + +/** + * Takes {@link LoadResult} objects and processes them in an executor service. + */ +class LoadResultExecutor implements Closeable { + + private final ExecutorService mExecutor; + private final UfsSyncPathCache mSyncPathCache; + private final SyncProcess mSyncProcess; + + LoadResultExecutor( + SyncProcess syncProcess, + int executorThreads, UfsSyncPathCache syncPathCache) { + mExecutor = Executors.newFixedThreadPool(executorThreads, + ThreadFactoryUtils.build("mdsync-perform-sync-%d", true)); + mSyncPathCache = syncPathCache; + mSyncProcess = syncProcess; + } + + void processLoadResult( + LoadResult result, Runnable beforeProcessing, Consumer onComplete, + Consumer onError) { + mExecutor.submit(() -> { + beforeProcessing.run(); + try { + onComplete.accept( + mSyncProcess.performSync(result, mSyncPathCache) + ); + } catch (DefaultSyncProcess.MountPointNotFoundRuntimeException e) { + result.getTaskInfo().getStats().reportSyncFailReason( + result.getLoadRequest(), result, + SyncFailReason.PROCESSING_MOUNT_POINT_DOES_NOT_EXIST, e); + onError.accept(e); + } catch (Throwable t) { + result.getTaskInfo().getStats().reportSyncFailReason( + result.getLoadRequest(), result, SyncFailReason.PROCESSING_UNKNOWN, t); + onError.accept(t); + } + }); + } + + @Override + public void close() throws IOException { + mExecutor.shutdown(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/MetadataSyncHandler.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/MetadataSyncHandler.java new file mode 100644 index 000000000000..40ccc0c2bf52 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/MetadataSyncHandler.java @@ -0,0 +1,100 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.file.meta.InodeTree; +import alluxio.underfs.UfsLoadResult; + +import com.google.common.annotations.VisibleForTesting; + +import java.util.Optional; + +/** + * The interactions between different task processing steps is exposed through this + * standard interface in order to allow changes in the future, for example calling + * separate components over the network. + */ +public class MetadataSyncHandler { + + private final TaskTracker mTaskTracker; + @VisibleForTesting + final DefaultFileSystemMaster mFsMaster; + private final InodeTree mInodeTree; + + /** + * Creates a metadata sync kernel. + * @param taskTracker the task tracker + * @param fsMaster the file system master + * @param inodeTree the inode tree + */ + public MetadataSyncHandler( + TaskTracker taskTracker, DefaultFileSystemMaster fsMaster, InodeTree inodeTree) { + mTaskTracker = taskTracker; + mFsMaster = fsMaster; + mInodeTree = inodeTree; + } + + void onLoadRequestError(long taskId, long loadId, Throwable t) { + mTaskTracker.getActiveTask(taskId).ifPresent( + task -> task.getPathLoadTask().onLoadRequestError(loadId, t)); + } + + void onFailed(long taskId, Throwable t) { + mTaskTracker.getActiveTask(taskId).ifPresent(task -> { + task.onFailed(t); + }); + } + + void onProcessError(long taskId, Throwable t) { + mTaskTracker.getActiveTask(taskId).ifPresent(task -> + task.getPathLoadTask().onProcessError(t)); + } + + void onEachResult(long taskId, SyncProcessResult result) { + mTaskTracker.getActiveTask(taskId).ifPresent(task -> task.nextCompleted(result)); + } + + void onTaskError(long taskId, Throwable t) { + mTaskTracker.getActiveTask(taskId).ifPresent(task -> mTaskTracker.taskError(taskId, t)); + } + + void onTaskComplete(long taskId, boolean isFile) { + mTaskTracker.taskComplete(taskId, isFile); + } + + void onPathLoadComplete(long taskId, boolean isFile) { + mTaskTracker.getActiveTask(taskId).ifPresent( + task -> task.onComplete(isFile, mFsMaster, mInodeTree)); + } + + /** + * Loads a nested directory. + * @param taskId the task id + * @param path the load path + */ + public void loadNestedDirectory(long taskId, AlluxioURI path) { + mTaskTracker.getActiveTask(taskId).ifPresent( + task -> task.getPathLoadTask().loadNestedDirectory(path)); + } + + Optional onReceiveLoadRequestOutput(long taskId, long loadId, UfsLoadResult result) { + return mTaskTracker.getActiveTask(taskId).flatMap(task -> + task.getPathLoadTask().createLoadResult(loadId, result)); + } + + void onProcessComplete(long taskId, long loadRequestId, SyncProcessResult result) { + mTaskTracker.getActiveTask(taskId).ifPresent(task -> + task.getPathLoadTask().onProcessComplete(loadRequestId, result)); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/PathLoaderTask.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/PathLoaderTask.java new file mode 100644 index 000000000000..755eb021658b --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/PathLoaderTask.java @@ -0,0 +1,259 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsLoadResult; +import alluxio.util.RateLimiter; + +import com.codahale.metrics.Counter; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Optional; +import java.util.concurrent.PriorityBlockingQueue; +import java.util.function.Function; +import javax.annotation.Nullable; + +/** + * This is the task for handling the loading of a path from the UFS. + * It will consist of at least 1 load request. + */ +public class PathLoaderTask { + private static final Logger LOG = LoggerFactory.getLogger(PathLoaderTask.class); + + public static final Counter PROCESS_FAIL_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_PROCESSING_FAILED.getName()); + public static final Counter LOAD_FAIL_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_LOADS_FAILED.getName()); + + /** + * All load requests that are ready, but have not yet started executing. + * This must be concurrent safe as other threads will poll it to get the + * next load request. + */ + private final PriorityBlockingQueue mNextLoad; + /** + * True when the task is completed, must be volatile, as other threads + * will access it to check if they should stop polling {@link PathLoaderTask#mNextLoad}. + */ + private volatile boolean mCompleted = false; + /** + * These are all running (or ready to be run) load requests. + */ + private final HashMap mRunningLoads = new HashMap<>(); + /** + * The load id that starts each load (where a load is a set of multiple load batches until + * a batch is not truncated) is stored here until the request that truncates this load + * is completed. + */ + private final HashSet mTruncatedLoads = new HashSet<>(); + private final TaskInfo mTaskInfo; + private long mNxtLoadId = 0; + private Runnable mRunOnPendingLoad; + private final RateLimiter mRateLimiter; + + private final Function> mClientSupplier; + + private DescendantType computeDescendantType() { + if (mTaskInfo.getDescendantType() == DescendantType.ALL + && mTaskInfo.getLoadByDirectory() != DirectoryLoadType.SINGLE_LISTING) { + return DescendantType.ONE; + } + return mTaskInfo.getDescendantType(); + } + + /** + * Create a new PathLoaderTask. + * @param taskInfo task info + * @param continuationToken token + * @param clientSupplier the client supplier + */ + public PathLoaderTask( + TaskInfo taskInfo, @Nullable String continuationToken, + Function> clientSupplier) { + mTaskInfo = taskInfo; + final long loadId = mNxtLoadId++; + // the first load request will get a GetStatus check on the path + // the following loads will be listings + LoadRequest firstRequest = new LoadRequest(loadId, loadId, mTaskInfo, mTaskInfo.getBasePath(), + continuationToken, null, computeDescendantType(), true); + mNextLoad = new PriorityBlockingQueue<>(); + addLoadRequest(firstRequest, true); + mClientSupplier = clientSupplier; + try (CloseableResource client = mClientSupplier.apply(mTaskInfo.getBasePath())) { + mRateLimiter = client.get().getRateLimiter(); + } + } + + RateLimiter getRateLimiter() { + return mRateLimiter; + } + + boolean isComplete() { + return mCompleted; + } + + TaskInfo getTaskInfo() { + return mTaskInfo; + } + + CloseableResource getClient() { + return mClientSupplier.apply(mTaskInfo.getBasePath()); + } + + synchronized void runOnPendingLoad(Runnable toRun) { + mRunOnPendingLoad = toRun; + } + + synchronized Optional createLoadResult( + long requestId, UfsLoadResult ufsLoadResult) { + if (mCompleted) { + return Optional.empty(); + } + LoadRequest originalRequest = mRunningLoads.get(requestId); + if (originalRequest == null) { + LOG.debug("Received a load result for task {} for a load that was already" + + "removed with id {}", + mTaskInfo, requestId); + return Optional.empty(); + } + TaskStats stats = mTaskInfo.getStats(); + stats.gotBatch(ufsLoadResult.getItemsCount()); + if (originalRequest.isFirstLoad() && ufsLoadResult.isFirstFile()) { + stats.setFirstLoadFile(); + } + // If truncated, need to submit a new task for the next set of items + // unless descendant type is none + boolean shouldLoadMore = originalRequest.getDescendantType() != DescendantType.NONE + && ufsLoadResult.isTruncated(); + if (shouldLoadMore) { + final long loadId = mNxtLoadId++; + addLoadRequest(new LoadRequest(loadId, originalRequest.getBatchSetId(), mTaskInfo, + originalRequest.getLoadPath(), ufsLoadResult.getContinuationToken(), + ufsLoadResult.getLastItem().orElse(null), + computeDescendantType(), false), + false); + } + return Optional.of(new LoadResult(originalRequest, originalRequest.getLoadPath(), + mTaskInfo, originalRequest.getPreviousLoadLast().orElse(null), + ufsLoadResult, originalRequest.isFirstLoad())); + } + + void loadNestedDirectory(AlluxioURI path) { + // If we are loading by directory, then we must create a new load task on each + // directory traversed + synchronized (this) { + final long loadId = mNxtLoadId++; + addLoadRequest(new LoadRequest(loadId, loadId, mTaskInfo, path, + null, null, computeDescendantType(), false), true); + } + } + + private void addLoadRequest(LoadRequest loadRequest, boolean isFirstForPath) { + mRunningLoads.put(loadRequest.getLoadRequestId(), loadRequest); + mNextLoad.add(loadRequest); + if (isFirstForPath) { + mTruncatedLoads.add(loadRequest.getBatchSetId()); + } + if (mRunOnPendingLoad != null) { + mRunOnPendingLoad.run(); + } + } + + /** + * This should be called when a load request task with id is finished + * processing by the metadata sync. + * @param loadRequestId the id of the finished task + */ + void onProcessComplete(long loadRequestId, SyncProcessResult result) { + mTaskInfo.getMdSync().onEachResult(mTaskInfo.getId(), result); + boolean completed = false; + synchronized (this) { + LoadRequest request = mRunningLoads.remove(loadRequestId); + if (request != null && !result.isTruncated()) { + Preconditions.checkState(mTruncatedLoads.remove(request.getBatchSetId()), + "load request %s finished, without finding the load %s that started the batch loading", + loadRequestId, request.getBatchSetId()); + } + if (mTruncatedLoads.size() == 0 && mRunningLoads.size() == 0) { + // all sets of loads have finished + completed = true; + mCompleted = true; + } + } + if (completed) { + mTaskInfo.getMdSync().onPathLoadComplete(mTaskInfo.getId(), + result.rootPathIsFile()); + } + } + + synchronized void onProcessError(Throwable t) { + PROCESS_FAIL_COUNT.inc(); + // If there is a processing error then we fail the entire task + mTaskInfo.getStats().setProcessFailed(); + mCompleted = true; + mTaskInfo.getMdSync().onFailed(mTaskInfo.getId(), t); + } + + synchronized void onLoadRequestError(long id, Throwable t) { + LOAD_FAIL_COUNT.inc(); + mTaskInfo.getStats().gotLoadError(); + if (mCompleted) { + LOG.debug("Received a load error for task {} wit id {} after the task was completed", + mTaskInfo, id); + return; + } + LoadRequest load = mRunningLoads.get(id); + if (load == null) { + LOG.debug("Received a load error for task {} for a load that was already" + + "removed with id {}", + mTaskInfo, id); + return; + } + if (load.attempt()) { + LOG.debug("Rescheduling retry of load on path {}, with id {}, with continuation token {}" + + "after error {}", + mTaskInfo, load.getLoadRequestId(), load.getContinuationToken(), t); + addLoadRequest(load, false); + } else { + LOG.warn("Path loader task failed of load on path {}," + + "with id {} with continuation token {} after error {}", + mTaskInfo, load.getLoadRequestId(), load.getContinuationToken(), t); + mCompleted = true; + mTaskInfo.getStats().setLoadFailed(); + mTaskInfo.getMdSync().onFailed(mTaskInfo.getId(), t); + } + } + + synchronized void cancel() { + LOG.debug("Canceling load task on path {}", mTaskInfo); + mCompleted = true; + } + + Optional getNext() { + return Optional.ofNullable(mNextLoad.poll()); + } + + int getPendingLoadCount() { + return mNextLoad.size(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/PathSequence.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/PathSequence.java new file mode 100644 index 000000000000..8882099f75f7 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/PathSequence.java @@ -0,0 +1,59 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; + +import java.util.Objects; + +/** + * A path sequence. + */ +public class PathSequence { + private final AlluxioURI mStart; + private final AlluxioURI mEnd; + + /** + * Creates a path sequence. + * @param start the start path + * @param end the end path + */ + public PathSequence(AlluxioURI start, AlluxioURI end) { + mStart = start; + mEnd = end; + } + + AlluxioURI getStart() { + return mStart; + } + + AlluxioURI getEnd() { + return mEnd; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + PathSequence that = (PathSequence) o; + return Objects.equals(mStart, that.mStart) && Objects.equals(mEnd, that.mEnd); + } + + @Override + public int hashCode() { + return Objects.hash(mStart, mEnd); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/PathWaiter.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/PathWaiter.java new file mode 100644 index 000000000000..106962f4ca35 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/PathWaiter.java @@ -0,0 +1,30 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; + +interface PathWaiter { + + /** + * The calling thread will be blocked until the given path has been synced. + * @param path the path to sync + * @return true if the sync on the path was successful, false otherwise + */ + boolean waitForSync(AlluxioURI path); + + /** + * Called on each batch of results that has completed processing. + * @param completed the completed results + */ + void nextCompleted(SyncProcessResult completed); +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/RateLimitedRequest.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/RateLimitedRequest.java new file mode 100644 index 000000000000..eb838375e6a1 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/RateLimitedRequest.java @@ -0,0 +1,60 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import com.google.common.base.Preconditions; + +import java.util.Objects; + +class RateLimitedRequest implements Comparable { + + PathLoaderTask mTask; + LoadRequest mLoadRequest; + long mPermit; + + RateLimitedRequest(PathLoaderTask task, LoadRequest loadRequest, long permit) { + mTask = Preconditions.checkNotNull(task); + mLoadRequest = Preconditions.checkNotNull(loadRequest); + mPermit = permit; + } + + public boolean isReady() { + return mTask.getRateLimiter().getWaitTimeNanos(mPermit) <= 0; + } + + public long getWaitTime() { + return mTask.getRateLimiter().getWaitTimeNanos(mPermit); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + RateLimitedRequest that = (RateLimitedRequest) o; + return mPermit == that.mPermit && mTask.equals(that.mTask) + && mLoadRequest.equals(that.mLoadRequest); + } + + @Override + public int hashCode() { + return Objects.hash(mTask, mLoadRequest, mPermit); + } + + @Override + public int compareTo(RateLimitedRequest o) { + return Long.compare(mPermit, o.mPermit); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncFailReason.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncFailReason.java new file mode 100644 index 000000000000..2a1c9f4828dd --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncFailReason.java @@ -0,0 +1,28 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +/** + * The metadata sync fail reason. + */ +public enum SyncFailReason { + UNKNOWN, + UNSUPPORTED, + + LOADING_UFS_IO_FAILURE, + LOADING_MOUNT_POINT_DOES_NOT_EXIST, + + PROCESSING_UNKNOWN, + PROCESSING_CONCURRENT_UPDATE_DURING_SYNC, + PROCESSING_FILE_DOES_NOT_EXIST, + PROCESSING_MOUNT_POINT_DOES_NOT_EXIST, +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperation.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperation.java new file mode 100644 index 000000000000..2b4deadb0490 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperation.java @@ -0,0 +1,80 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import com.codahale.metrics.Counter; + +/** + * The metadata sync operations. + */ +public enum SyncOperation { + // Compared but not updated + NOOP(0, SyncOperationMetrics.NOOP_COUNT), + CREATE(1, SyncOperationMetrics.CREATE_COUNT), + DELETE(2, SyncOperationMetrics.DELETE_COUNT), + // Deleted then created due to the changed file data + RECREATE(3, SyncOperationMetrics.RECREATED_COUNT), + // Metadata updated + UPDATE(4, SyncOperationMetrics.UPDATE_COUNT), + SKIPPED_DUE_TO_CONCURRENT_MODIFICATION(5, SyncOperationMetrics.SKIP_CONCURRENT_UPDATE_COUNT), + SKIPPED_ON_MOUNT_POINT(6, SyncOperationMetrics.SKIP_MOUNT_POINT_COUNT), + SKIPPED_NON_PERSISTED(7, SyncOperationMetrics.SKIPPED_NON_PERSISTED_COUNT); + + private final int mValue; + private final Counter mCounter; + + SyncOperation(int value, Counter counter) { + mValue = value; + mCounter = counter; + } + + /** + * @param value the value + * @return the enum of the value + */ + public static SyncOperation fromInteger(int value) { + switch (value) { + case 0: + return NOOP; + case 1: + return CREATE; + case 2: + return DELETE; + case 3: + return RECREATE; + case 4: + return UPDATE; + case 5: + return SKIPPED_DUE_TO_CONCURRENT_MODIFICATION; + case 6: + return SKIPPED_ON_MOUNT_POINT; + case 7: + return SKIPPED_NON_PERSISTED; + default: + throw new IllegalArgumentException("Invalid SyncOperation value: " + value); + } + } + + /** + * @return the value + */ + public int getValue() { + return mValue; + } + + /** + * @return the metric counter + */ + public Counter getCounter() { + return mCounter; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperationMetrics.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperationMetrics.java new file mode 100644 index 000000000000..37317f6737db --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperationMetrics.java @@ -0,0 +1,40 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; + +import com.codahale.metrics.Counter; + +/** + * Sync operation metrics. + */ +public class SyncOperationMetrics { + public static final Counter CREATE_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_CREATED.getName()); + public static final Counter DELETE_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_DELETED.getName()); + public static final Counter RECREATED_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_RECREATED.getName()); + public static final Counter UPDATE_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_UPDATED.getName()); + public static final Counter SKIP_CONCURRENT_UPDATE_COUNT + = MetricsSystem.counter( + MetricKey.MASTER_METADATA_SYNC_FILES_SKIPPED_CONCURRENT_UPDATE.getName()); + public static final Counter SKIP_MOUNT_POINT_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_SKIPPED_MOUNT_POINT.getName()); + public static final Counter NOOP_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_NOOP.getName()); + public static final Counter SKIPPED_NON_PERSISTED_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_SKIPPED_NON_PERSISTED.getName()); +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcess.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcess.java new file mode 100644 index 000000000000..280fbf1809f2 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcess.java @@ -0,0 +1,29 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.master.file.meta.UfsSyncPathCache; + +/** + * The sync process interfaces. + */ +public interface SyncProcess { + /** + * Performs a sync. + * @param loadResult the UFS load result + * @param syncPathCache the sync path cache for updating the last sync time + * @return the sync process result + */ + SyncProcessResult performSync( + LoadResult loadResult, UfsSyncPathCache syncPathCache) throws Throwable; +} + diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessContext.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessContext.java new file mode 100644 index 000000000000..b5bc9c321a1b --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessContext.java @@ -0,0 +1,273 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.collections.ConcurrentHashSet; +import alluxio.file.options.DescendantType; +import alluxio.grpc.FileSystemMasterCommonPOptions; +import alluxio.master.file.BlockDeletionContext; +import alluxio.master.file.FileSystemJournalEntryMerger; +import alluxio.master.file.RpcContext; +import alluxio.master.file.contexts.OperationContext; +import alluxio.master.file.meta.UfsAbsentPathCache; +import alluxio.master.journal.FileSystemMergeJournalContext; +import alluxio.master.journal.MetadataSyncMergeJournalContext; + +import com.google.common.base.Preconditions; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Set; + +/** + * The context for the metadata sync processing. + */ +public class SyncProcessContext implements Closeable { + private final DescendantType mDescendantType; + private final MetadataSyncRpcContext mRpcContext; + private final RpcContext mBaseRpcContext; + private final boolean mAllowConcurrentModification; + private final FileSystemMasterCommonPOptions mCommonOptions; + private final Set mDirectoriesToUpdateAbsentCache = new ConcurrentHashSet<>(); + private final TaskInfo mTaskInfo; + private final LoadResult mLoadResult; + + /** + * Creates a metadata sync context. + * + * @param loadResult the load UFS result + * @param baseRpcContext the base rpc context + * @param rpcContext the metadata sync rpc context + * @param commonOptions the common options for TTL configurations + */ + private SyncProcessContext( + LoadResult loadResult, RpcContext baseRpcContext, MetadataSyncRpcContext rpcContext, + FileSystemMasterCommonPOptions commonOptions, + boolean allowConcurrentModification + ) { + mDescendantType = loadResult.getLoadRequest().getDescendantType(); + mRpcContext = rpcContext; + mBaseRpcContext = baseRpcContext; + mCommonOptions = commonOptions; + mAllowConcurrentModification = allowConcurrentModification; + mTaskInfo = loadResult.getTaskInfo(); + mLoadResult = loadResult; + } + + /** + * @return the descendant type of the sync + * NONE -> only syncs the inode itself + * ONE -> syncs the inode and its direct children + * ALL -> recursively syncs a directory + */ + public DescendantType getDescendantType() { + return mDescendantType; + } + + /** + * During the sync, the inodes might be updated by other requests concurrently, that makes + * the sync operation stale. If the concurrent modification is allowed, these inodes will be + * skipped, otherwise the sync will fail. + * + * @return true, if the concurrent modification is allowed. Otherwise, false + */ + public boolean isConcurrentModificationAllowed() { + return mAllowConcurrentModification; + } + + /** + * @return if the sync is a recursive sync + */ + public boolean isRecursive() { + return mDescendantType == DescendantType.ALL; + } + + /** + * @return the rpc context + */ + public MetadataSyncRpcContext getRpcContext() { + return mRpcContext; + } + + /** + * @return the metadata sync journal context + */ + public MetadataSyncMergeJournalContext getMetadataSyncJournalContext() { + return mRpcContext.getJournalContext(); + } + + /** + * @return the common options + */ + public FileSystemMasterCommonPOptions getCommonOptions() { + return mCommonOptions; + } + + /** + * adds directories which are supposed to update is children loaded flag when the sync is done. + * + * @param path the path + */ + public void addDirectoriesToUpdateIsChildrenLoaded(AlluxioURI path) { + mTaskInfo.addPathToUpdateDirectChildrenLoaded(path); + } + + /** + * adds directories which exists and needs to update the absent cache later. + * @param path the path + */ + public void addDirectoriesToUpdateAbsentCache(AlluxioURI path) { + mDirectoriesToUpdateAbsentCache.add(path); + } + + /** + * Updates the absent cache and set directories existing. + * @param ufsAbsentPathCache the absent cache + */ + public void updateAbsentCache(UfsAbsentPathCache ufsAbsentPathCache) { + for (AlluxioURI uri: mDirectoriesToUpdateAbsentCache) { + ufsAbsentPathCache.processExisting(uri); + } + } + + /** + * reports the completion of a successful sync operation. + * + * @param operation the operation + */ + public void reportSyncOperationSuccess(SyncOperation operation) { + reportSyncOperationSuccess(operation, 1); + } + + /** + * reports the completion of a successful sync operation. + * + * @param operation the operation + * @param count the number of successes + */ + public void reportSyncOperationSuccess(SyncOperation operation, long count) { + operation.getCounter().inc(count); + mTaskInfo.getStats().reportSyncOperationSuccess(operation, count); + } + + /** + * Reports a fail reason leading to the sync failure. + * + * @param reason the reason + * @param t the throwable + */ + public void reportSyncFailReason(SyncFailReason reason, Throwable t) { + mTaskInfo.getStats().reportSyncFailReason(mLoadResult.getLoadRequest(), mLoadResult, reason, t); + } + + /** + * @return the task info + */ + public TaskInfo getTaskInfo() { + return mTaskInfo; + } + + @Override + public void close() throws IOException { + mRpcContext.close(); + mBaseRpcContext.close(); + } + + static class MetadataSyncRpcContext extends RpcContext { + public MetadataSyncRpcContext( + BlockDeletionContext blockDeleter, MetadataSyncMergeJournalContext journalContext, + OperationContext operationContext) { + super(blockDeleter, journalContext, operationContext); + } + + @Override + public MetadataSyncMergeJournalContext getJournalContext() { + return (MetadataSyncMergeJournalContext) super.getJournalContext(); + } + } + + /** + * Creates a builder. + */ + public static class Builder { + private LoadResult mLoadResult; + private MetadataSyncRpcContext mRpcContext; + private RpcContext mBaseRpcContext; + private FileSystemMasterCommonPOptions mCommonOptions = DefaultSyncProcess.NO_TTL_OPTION; + private boolean mAllowConcurrentModification = true; + + /** + * Creates a builder. + * + * @param rpcContext the rpc context + * @param loadResult the load UFS result + * @return a new builder + */ + public static Builder builder(RpcContext rpcContext, LoadResult loadResult) { + Preconditions.checkState( + !(rpcContext.getJournalContext() instanceof FileSystemMergeJournalContext)); + Builder builder = new Builder(); + builder.mLoadResult = loadResult; + /* + * Wrap the journal context with a MetadataSyncMergeJournalContext, which behaves + * differently in: + * 1. the journals are merged and stayed in the context until it gets flushed + * 2. when close() or flush() are called, the journal does not trigger a hard flush + * that commits the journals, instead, it only adds the journals to the async journal writer. + * During the metadata sync process, we are creating/updating many files, but we don't want + * to hard flush journals on every inode updates. + */ + builder.mBaseRpcContext = rpcContext; + builder.mRpcContext = new MetadataSyncRpcContext(rpcContext.getBlockDeletionContext(), + new MetadataSyncMergeJournalContext(rpcContext.getJournalContext(), + new FileSystemJournalEntryMerger()), rpcContext.getOperationContext()); + return builder; + } + + /** + * @param rpcContext the rpc context + * @return builder + */ + public Builder setRpcContext(MetadataSyncRpcContext rpcContext) { + mRpcContext = rpcContext; + return this; + } + + /** + * @param commonOptions the common option + * @return builder + */ + public Builder setCommonOptions(FileSystemMasterCommonPOptions commonOptions) { + mCommonOptions = commonOptions; + return this; + } + + /** + * @param allowModification the current modification is allowed + * @return the builder + */ + public Builder setAllowModification(boolean allowModification) { + mAllowConcurrentModification = allowModification; + return this; + } + + /** + * @return the built metadata sync context + */ + public SyncProcessContext build() { + return new SyncProcessContext( + mLoadResult, mBaseRpcContext, mRpcContext, mCommonOptions, + mAllowConcurrentModification); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessResult.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessResult.java new file mode 100644 index 000000000000..315ae51cd7c2 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessResult.java @@ -0,0 +1,84 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; + +import java.util.Optional; +import javax.annotation.Nullable; + +/** + * This is the result of performing the metadata sync in Alluxio. + */ +public class SyncProcessResult { + + private final AlluxioURI mBaseLoadPath; + private final TaskInfo mTaskInfo; + private final PathSequence mLoaded; + private final boolean mIsTruncated; + private final boolean mRootPathIsFile; + + /** + * Constructs an instance of {@link SyncProcessResult}. + * + * @param taskInfo the task info + * @param baseLoadPath the base load path + * @param loaded the path sequence + * @param isTruncated whether the result is truncated or not + * @param rootPathIsFile whether the root path is a file or not + */ + public SyncProcessResult( + TaskInfo taskInfo, AlluxioURI baseLoadPath, + @Nullable PathSequence loaded, boolean isTruncated, + boolean rootPathIsFile) { + mRootPathIsFile = rootPathIsFile; + mBaseLoadPath = baseLoadPath; + mTaskInfo = taskInfo; + mLoaded = loaded; + mIsTruncated = isTruncated; + } + + /** + * @return true if the root path is a file, false otherwise + */ + public boolean rootPathIsFile() { + return mRootPathIsFile; + } + + /** + * @return the base load path + */ + public AlluxioURI getBaseLoadPath() { + return mBaseLoadPath; + } + + /** + * @return true if the result is truncated, false otherwise + */ + public boolean isTruncated() { + return mIsTruncated; + } + + /** + * @return Optional containing the loaded path sequence + */ + public Optional getLoaded() { + return Optional.ofNullable(mLoaded); + } + + /** + * @return the task info + */ + public TaskInfo getTaskInfo() { + return mTaskInfo; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskGroup.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskGroup.java new file mode 100644 index 000000000000..5253b46a872f --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskGroup.java @@ -0,0 +1,113 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.annotation.SuppressFBWarnings; +import alluxio.exception.runtime.DeadlineExceededRuntimeException; +import alluxio.grpc.SyncMetadataTask; + +import com.google.common.base.Preconditions; +import com.google.common.base.Stopwatch; + +import java.util.Arrays; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; + +/** + * A TaskGroup represents a set of {@link BaseTask} objects. + */ +public class TaskGroup { + @SuppressFBWarnings(value = "EI_EXPOSE_REP2") + private final BaseTask[] mTasks; + private final long mGroupId; + + /** + * Creates a new task group. + * @param groupId the id for this task group + * @param tasks the tasks to group + */ + public TaskGroup(long groupId, BaseTask... tasks) { + Preconditions.checkState(tasks != null && tasks.length > 0); + mGroupId = groupId; + mTasks = tasks; + } + + /** + * @return the base task for this group + */ + public BaseTask getBaseTask() { + return mTasks[0]; + } + + /** + * @return a stream of the tasks + */ + public Stream getTasks() { + return Arrays.stream(mTasks); + } + + /** + * @return the task count + */ + public int getTaskCount() { + return mTasks.length; + } + + /** + * @return true if all tasks succeeded + */ + public boolean allSucceeded() { + return Arrays.stream(mTasks).allMatch(BaseTask::succeeded); + } + + /** + * @return a stream of the tasks in protobuf format + */ + public Stream toProtoTasks() { + return getTasks().map(BaseTask::toProtoTask); + } + + /** + * @return the unique group id for this task + */ + public long getGroupId() { + return mGroupId; + } + + /** + * Waits for all the tasks to complete or until + * a timeout occurs. If any tasks fail it will throw the + * error caused by the failed task. + * If the wait times-out a {@link DeadlineExceededRuntimeException} is thrown. + * @param timeoutMs the time in milliseconds to wait for the task + * to complete, or 0 to wait forever + */ + public void waitAllComplete(long timeoutMs) throws Throwable { + Stopwatch sw = Stopwatch.createStarted(); + for (BaseTask task : mTasks) { + task.waitComplete(getRemainingTime(sw, timeoutMs)); + } + } + + private static long getRemainingTime( + Stopwatch sw, long timeoutMs) throws DeadlineExceededRuntimeException { + // Endless wait + if (timeoutMs == 0) { + return 0; + } + long remaining = timeoutMs - sw.elapsed(TimeUnit.MILLISECONDS); + if (remaining <= 0) { + throw new DeadlineExceededRuntimeException("Task still running."); + } + return remaining; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskInfo.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskInfo.java new file mode 100644 index 000000000000..197b5092376b --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskInfo.java @@ -0,0 +1,149 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.conf.path.TrieNode; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; + +import java.util.stream.Stream; +import javax.annotation.Nullable; + +/** + * This represents the overall metadata sync task information. + */ +public class TaskInfo { + private final AlluxioURI mBasePath; + private final AlluxioURI mAlluxioPath; + private final String mStartAfter; + private final DescendantType mDescendantType; + private final long mId; + private final DirectoryLoadType mLoadByDirectory; + private final long mSyncInterval; + private final MetadataSyncHandler mMetadataSyncHandler; + private final TaskStats mStats; + + private final TrieNode mPathsToUpdateDirectChildrenLoaded = new TrieNode<>(); + + TaskInfo( + MetadataSyncHandler metadataSyncHandler, + AlluxioURI ufsPath, // basePath should be without the header/bucket, e.g. no s3:// + AlluxioURI alluxioPath, + @Nullable String startAfter, + DescendantType descendantType, + long syncInterval, + DirectoryLoadType loadByDirectory, + long id) { + mBasePath = ufsPath; + mAlluxioPath = alluxioPath; + mSyncInterval = syncInterval; + mDescendantType = descendantType; + mLoadByDirectory = loadByDirectory; + mId = id; + mStartAfter = startAfter; + mMetadataSyncHandler = metadataSyncHandler; + mStats = new TaskStats(); + } + + /** + * @return the task stats + */ + public TaskStats getStats() { + return mStats; + } + + /** + * @return the alluxio path + */ + public AlluxioURI getAlluxioPath() { + return mAlluxioPath; + } + + /** + * @return the sync interval + */ + public long getSyncInterval() { + return mSyncInterval; + } + + /** + * @return true, if the task contains dir load tasks + */ + public boolean hasDirLoadTasks() { + return mDescendantType == DescendantType.ALL + && mLoadByDirectory != DirectoryLoadType.SINGLE_LISTING; + } + + String getStartAfter() { + return mStartAfter; + } + + /** + * @return the metadata sync kernel + */ + public MetadataSyncHandler getMdSync() { + return mMetadataSyncHandler; + } + + /** + * @return the base path + */ + public AlluxioURI getBasePath() { + return mBasePath; + } + + /** + * @return the id + */ + public long getId() { + return mId; + } + + /** + * @return the load by directory type + */ + DirectoryLoadType getLoadByDirectory() { + return mLoadByDirectory; + } + + /** + * @return the descendant type + */ + public DescendantType getDescendantType() { + return mDescendantType; + } + + @Override + public String toString() { + return String.format( + "TaskInfo{UFS path: %s, AlluxioPath: %s, Descendant Type: %s," + + " Directory Load Type: %s, Id: %d}", mBasePath, mAlluxioPath, + mDescendantType, mLoadByDirectory, mId); + } + + /** + * @return the paths need to update direct children loaded + */ + synchronized Stream getPathsToUpdateDirectChildrenLoaded() { + return mPathsToUpdateDirectChildrenLoaded.getLeafChildren("/").map(TrieNode::getValue); + } + + /** + * Add path to set direct children loaded. This call must be synchronized + * as it will be called by different threads while processing tasks. + * @param uri to update direct children loaded + */ + synchronized void addPathToUpdateDirectChildrenLoaded(AlluxioURI uri) { + mPathsToUpdateDirectChildrenLoaded.insert(uri.getPath()).setValue(uri); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskStats.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskStats.java new file mode 100644 index 000000000000..e2615dc9334d --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskStats.java @@ -0,0 +1,279 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.collections.Pair; + +import com.google.common.base.MoreObjects; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import javax.annotation.Nullable; + +/** + * The metadata sync task stats. + */ +public class TaskStats { + private final AtomicInteger mBatches = new AtomicInteger(); + private final AtomicInteger mStatuses = new AtomicInteger(); + private final AtomicInteger mLoadErrors = new AtomicInteger(); + private final AtomicInteger mLoadRequests = new AtomicInteger(); + final AtomicInteger mProcessStarted = new AtomicInteger(); + final AtomicInteger mProcessCompleted = new AtomicInteger(); + private final AtomicLong[] mSuccessOperationCount; + private final Map mSyncFailReasons = + new ConcurrentHashMap<>(); + private volatile boolean mLoadFailed; + private volatile boolean mProcessFailed; + private volatile boolean mFirstLoadFile; + private volatile boolean mSyncFailed = false; + + /** + * Creates a new task stats. + */ + public TaskStats() { + mSuccessOperationCount = new AtomicLong[SyncOperation.values().length]; + for (int i = 0; i < mSuccessOperationCount.length; ++i) { + mSuccessOperationCount[i] = new AtomicLong(); + } + } + + @Override + public String toString() { + MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this) + .add("Success op count", getSuccessOperationCountString().getSecond()) + .add("# of batches", mBatches.get()) + .add("# of objects loaded from UFS", mStatuses.get()) + .add("# of load requests", mLoadRequests.get()) + .add("# of load errors", mLoadErrors.get()) + .add("Load failed", mLoadFailed) + .add("Process failed", mProcessFailed) + .add("First load was file", mFirstLoadFile) + .add("Failed load requests", mSyncFailReasons); + return helper.toString(); + } + + /** + * @return a formatted string that is used to display as the cli command output + */ + public Pair toReportString() { + Pair successOps = getSuccessOperationCountString(); + MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this); + helper.add("Success op count", successOps.getSecond()) + .add("# of batches", mBatches.get()) + .add("# of objects loaded from UFS", mStatuses.get()) + .add("# of load requests", mLoadRequests.get()) + .add("# of load errors", mLoadErrors.get()); + if (mSyncFailReasons.size() > 0) { + helper.add("Failed load requests", mSyncFailReasons); + } + return new Pair<>(successOps.getFirst(), helper.toString()); + } + + /** + * @return if the first load was file + */ + boolean firstLoadWasFile() { + return mFirstLoadFile; + } + + /** + * @return if the load is failed + */ + boolean isLoadFailed() { + return mLoadFailed; + } + + /** + * @return if the processing is failed + */ + boolean isProcessFailed() { + return mProcessFailed; + } + + int getLoadRequestCount() { + return mLoadRequests.get(); + } + + int getBatchCount() { + return mBatches.get(); + } + + /** + * @return the status count + */ + int getStatusCount() { + return mStatuses.get(); + } + + int getLoadErrors() { + return mLoadErrors.get(); + } + + void gotBatch(int size) { + mBatches.incrementAndGet(); + mStatuses.addAndGet(size); + } + + void gotLoadRequest() { + mLoadRequests.incrementAndGet(); + } + + void gotLoadError() { + mLoadErrors.incrementAndGet(); + } + + void setLoadFailed() { + mLoadFailed = true; + } + + void setProcessFailed() { + mProcessFailed = true; + } + + void setFirstLoadFile() { + mFirstLoadFile = true; + } + + /** + * @return success operation count map + */ + public AtomicLong[] getSuccessOperationCount() { + return mSuccessOperationCount; + } + + private Pair getSuccessOperationCountString() { + StringBuilder sb = new StringBuilder(); + sb.append("{"); + long total = 0; + for (int i = 0; i < mSuccessOperationCount.length; ++i) { + long value = mSuccessOperationCount[i].get(); + total += value; + if (value != 0) { + sb.append("[") + .append(SyncOperation.fromInteger(i)) + .append(":") + .append(value) + .append("]"); + } + } + sb.append("}"); + return new Pair<>(total, sb.toString()); + } + + /** + * reports the completion of a successful sync operation. + * + * @param operation the operation + * @param count the number of successes + */ + void reportSyncOperationSuccess(SyncOperation operation, long count) { + mSuccessOperationCount[operation.getValue()].addAndGet(count); + } + + /** + * Sets the sync failed. + */ + public void setSyncFailed() { + mSyncFailed = true; + } + + /** + * @return if the sync failed + */ + public boolean getSyncFailed() { + return mSyncFailed; + } + + /** + * Reports a sync fail reason. + * @param request the load request + * @param loadResult the load result + * @param reason the sync fail reason + * @param t the exception + */ + void reportSyncFailReason( + LoadRequest request, @Nullable LoadResult loadResult, + SyncFailReason reason, Throwable t) { + mSyncFailReasons.putIfAbsent( + request.getLoadRequestId(), new SyncFailure(request, loadResult, reason, t) + ); + } + + /** + * @return the sync fail reason map + * The key is the load request id and the value is the failure. + * A reported error does not necessarily fail the sync as we retry. This map only records all + * failures we even encountered. Please refer to BaseTask::getState to get the sync task state. + */ + public Map getSyncFailReasons() { + return mSyncFailReasons; + } + + /** + * The sync failure. + */ + public static class SyncFailure { + private final LoadRequest mLoadRequest; + @Nullable + private final LoadResult mLoadResult; + private final Throwable mThrowable; + private final SyncFailReason mFailReason; + + /** + * Constructs an object. + * @param loadRequest the load request + * @param loadResult the load result + * @param failReason the fail reason + * @param throwable the exception + */ + public SyncFailure( + LoadRequest loadRequest, @Nullable LoadResult loadResult, + SyncFailReason failReason, Throwable throwable) { + mLoadRequest = loadRequest; + mLoadResult = loadResult; + mThrowable = throwable; + mFailReason = failReason; + } + + /** + * @return the sync fail reason + */ + public SyncFailReason getSyncFailReason() { + return mFailReason; + } + + @Override + public String toString() { + String loadFrom = "{beginning}"; + if (mLoadRequest.getPreviousLoadLast().isPresent()) { + loadFrom = mLoadRequest.getPreviousLoadLast().get().toString(); + } + String loadUntil = "{N/A}"; + if (mLoadResult != null && mLoadResult.getUfsLoadResult().getLastItem().isPresent()) { + loadUntil = mLoadResult.getUfsLoadResult().getLastItem().get().toString(); + } + + MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this) + .add("LoadRequestId", mLoadRequest.getLoadRequestId()) + .add("FailReason", mFailReason) + .add("DescendantType", mLoadRequest.getDescendantType()) + .add("LoadPath", mLoadRequest.getLoadRequestId()) + .add("LoadFrom", loadFrom) + .add("LoadUntil", loadUntil) + .add("Exception", mThrowable); + return helper.toString(); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskTracker.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskTracker.java new file mode 100644 index 000000000000..ec521ce83aab --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskTracker.java @@ -0,0 +1,329 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.collections.Pair; +import alluxio.conf.path.TrieNode; +import alluxio.exception.status.NotFoundException; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.SyncMetadataTask; +import alluxio.master.file.meta.UfsAbsentPathCache; +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; + +import com.codahale.metrics.Counter; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.util.HashMap; +import java.util.Optional; +import java.util.function.Function; +import javax.annotation.Nullable; + +/** + * Tracks metadata sync tasks. The tasks will be submitted by UFS URL by user RPC threads. + */ +public class TaskTracker implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(TaskTracker.class); + + private final TrieNode mActiveRecursiveListTasks; + private final TrieNode mActiveListTasks; + private final TrieNode mActiveStatusTasks; + private final HashMap mActiveTaskMap = new HashMap<>(); + // TODO(elega) make this a configurable property + private final Cache mFinishedTaskMap = + CacheBuilder.newBuilder().maximumSize(1000).build(); + private final LoadRequestExecutor mLoadRequestExecutor; + private final UfsSyncPathCache mSyncPathCache; + private final UfsAbsentPathCache mAbsentPathCache; + private final Function> mClientSupplier; + + public static final Counter COMPLETED_TASK_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_COMPLETED_TASKS.getName()); + public static final Counter FAILED_TASK_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FAILED_TASKS.getName()); + public static final Counter CANCELLED_TASK_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_CANCELLED_TASKS.getName()); + + private long mNxtId = 0; + + /** + * Create a new TaskTracker. + * @param executorThreads the number of threads to run the metadata sync processing + * @param maxUfsRequests the maximum number of concurrently running + * (or completed but not yet processed) Ufs requests + * @param allowConcurrentNonRecursiveList if true, non-recursive lists tasks will + * run concurrently with recursive list tasks + * @param allowConcurrentGetStatus if true, getStatus tasks will run concurrently + * with recursive list tasks + * @param syncPathCache the sync path cache + * @param absentPathCache the absent cache + * @param syncProcess the sync process + * @param clientSupplier the client supplier + */ + public TaskTracker( + int executorThreads, int maxUfsRequests, + boolean allowConcurrentGetStatus, boolean allowConcurrentNonRecursiveList, + UfsSyncPathCache syncPathCache, + UfsAbsentPathCache absentPathCache, + SyncProcess syncProcess, + Function> clientSupplier) { + LOG.info("Metadata sync executor threads {}, max concurrent ufs requests {}", + executorThreads, maxUfsRequests); + mSyncPathCache = syncPathCache; + mAbsentPathCache = absentPathCache; + mLoadRequestExecutor = new LoadRequestExecutor(maxUfsRequests, + new LoadResultExecutor(syncProcess, executorThreads, syncPathCache)); + mActiveRecursiveListTasks = new TrieNode<>(); + if (allowConcurrentNonRecursiveList) { + mActiveListTasks = new TrieNode<>(); + } else { + mActiveListTasks = mActiveRecursiveListTasks; + } + if (allowConcurrentGetStatus) { + mActiveStatusTasks = new TrieNode<>(); + } else { + mActiveStatusTasks = mActiveRecursiveListTasks; + } + mClientSupplier = clientSupplier; + registerMetrics(); + } + + /** + * @param taskId the task id + * @return the task + */ + public synchronized Optional getActiveTask(long taskId) { + return Optional.ofNullable(mActiveTaskMap.get(taskId)); + } + + /** + * @param taskId the task id + * @return the task + */ + public synchronized Optional getTaskProto(long taskId) { + BaseTask task = mActiveTaskMap.get(taskId); + if (task != null) { + return Optional.of(task.toProtoTask()); + } + return Optional.ofNullable(mFinishedTaskMap.getIfPresent(taskId)); + } + + synchronized boolean hasRunningTasks() { + return mActiveListTasks.getCommonRoots().hasNext() + || mActiveStatusTasks.getCommonRoots().hasNext() + || mActiveRecursiveListTasks.getCommonRoots().hasNext(); + } + + void taskComplete(long taskId, boolean isFile) { + synchronized (this) { + BaseTask baseTask = mActiveTaskMap.get(taskId); + if (baseTask != null) { + if (!baseTask.removeOnComplete()) { + mFinishedTaskMap.put(taskId, baseTask.toProtoTask()); + } + COMPLETED_TASK_COUNT.inc(); + mActiveTaskMap.remove(taskId); + LOG.debug("Task {} completed", baseTask); + mSyncPathCache.notifySyncedPath(baseTask.getTaskInfo().getBasePath(), + baseTask.getTaskInfo().getDescendantType(), baseTask.getStartTime(), + null, isFile); + if (baseTask.getTaskInfo().getStats().getStatusCount() == 0) { + mAbsentPathCache.addSinglePath(baseTask.getTaskInfo().getBasePath()); + } else { + mAbsentPathCache.processExisting(baseTask.getTaskInfo().getBasePath()); + } + TrieNode activeTasks = getActiveTasksForDescendantType( + baseTask.getTaskInfo().getDescendantType()); + Preconditions.checkNotNull(activeTasks.deleteIf( + baseTask.getTaskInfo().getBasePath().toString(), a -> true), + "task missing").setValue(null); + } else { + LOG.debug("Task with id {} completed, but was already removed", taskId); + } + } + mLoadRequestExecutor.onTaskComplete(taskId); + } + + void taskError(long taskId, Throwable t) { + synchronized (this) { + BaseTask baseTask = mActiveTaskMap.remove(taskId); + if (baseTask != null) { + FAILED_TASK_COUNT.inc(); + LOG.debug("Task {} failed with error {}", baseTask, t); + TrieNode activeTasks = getActiveTasksForDescendantType( + baseTask.getTaskInfo().getDescendantType()); + Preconditions.checkNotNull(activeTasks.deleteIf( + baseTask.getTaskInfo().getBasePath().toString(), a -> true), + "task missing").setValue(null); + if (!baseTask.removeOnComplete()) { + mFinishedTaskMap.put(taskId, baseTask.toProtoTask()); + } + } else { + LOG.debug("Task with id {} failed with error, but was already removed", taskId, t); + } + } + mLoadRequestExecutor.onTaskComplete(taskId); + } + + synchronized void cancelTasksUnderPath(AlluxioURI path) { + mActiveRecursiveListTasks.getLeafChildren(path.toString()).forEach(nxt -> + mActiveTaskMap.remove(nxt.getValue().cancel())); + mActiveListTasks.getLeafChildren(path.toString()).forEach(nxt -> + mActiveTaskMap.remove(nxt.getValue().cancel())); + mActiveStatusTasks.getLeafChildren(path.toString()).forEach(nxt -> + mActiveTaskMap.remove(nxt.getValue().cancel())); + } + + /** + * Cancels an ongoing sync task. + * @param taskId the task id + */ + public synchronized void cancelTaskById(long taskId) throws NotFoundException { + BaseTask baseTask = mActiveTaskMap.get(taskId); + if (baseTask == null) { + throw new NotFoundException("Task " + taskId + " not found or has already been canceled."); + } + if (baseTask.isCompleted().isPresent()) { + return; + } + if (!baseTask.removeOnComplete()) { + mFinishedTaskMap.put(taskId, baseTask.toProtoTask()); + } + CANCELLED_TASK_COUNT.inc(); + mActiveTaskMap.remove(taskId); + baseTask.cancel(); + TrieNode activeTasks = getActiveTasksForDescendantType( + baseTask.getTaskInfo().getDescendantType()); + Preconditions.checkNotNull(activeTasks.deleteIf( + baseTask.getTaskInfo().getBasePath().toString(), a -> true), "task missing") + .setValue(null); + } + + private TrieNode getActiveTasksForDescendantType(DescendantType depth) { + switch (depth) { + case NONE: + return mActiveStatusTasks; + case ONE: + return mActiveListTasks; + default: + return mActiveRecursiveListTasks; + } + } + + /** + * Launches a metadata sync task asynchronously with the given parameters. + * This function should be used when manually launching metadata sync tasks. + * @param metadataSyncHandler the MdSync object + * @param ufsPath the ufsPath to sync + * @param alluxioPath the alluxio path matching the mounted ufsPath + * @param startAfter if the sync should start after a given internal path + * @param depth the depth of descendents to load + * @param syncInterval the sync interval + * @param loadByDirectory the load by directory type + * @param removeOnComplete if the task should be removed on complete + * @return the running task object + */ + public BaseTask launchTaskAsync( + MetadataSyncHandler metadataSyncHandler, + AlluxioURI ufsPath, AlluxioURI alluxioPath, + @Nullable String startAfter, + DescendantType depth, long syncInterval, + DirectoryLoadType loadByDirectory, + boolean removeOnComplete) { + BaseTask task; + synchronized (this) { + TrieNode activeTasks = getActiveTasksForDescendantType(depth); + task = activeTasks.getLeafChildren(ufsPath.toString()) + .map(TrieNode::getValue).filter(nxt -> nxt.pathIsCovered(ufsPath, depth)).findFirst() + .orElseGet(() -> { + TrieNode newNode = activeTasks.insert(ufsPath.toString()); + Preconditions.checkState(newNode.getValue() == null); + final long id = mNxtId++; + BaseTask newTask = BaseTask.create( + new TaskInfo(metadataSyncHandler, ufsPath, alluxioPath, startAfter, + depth, syncInterval, loadByDirectory, id), + mSyncPathCache.recordStartSync(), + mClientSupplier, + removeOnComplete); + mActiveTaskMap.put(id, newTask); + newNode.setValue(newTask); + mLoadRequestExecutor.addPathLoaderTask(newTask.getLoadTask()); + return newTask; + }); + } + return task; + } + + /** + * Launches a metadata sync task with the given parameters. + * This function should be used when traversing the tree, and the + * path being traversed is needing a sync. + * This method will not return until the initial sync path has been + * synchronized. For example if the alluxio sync path is "/mount/file" + * it will not return until "file" has been synchronized. If instead + * the path being synchronized is a directory, e.g. "/mount/directory/" + * then the function will return as soon as the first batch of items + * in the directory has been synchronized, e.g. "/mount/directory/first", + * allowing the user to start listing the file before the sync has been + * completed entirely. As the directory is traversed, this function should + * be called on each subsequent path until the sync is complete. + * TODO(tcrain) integrate this in the filesystem operations traversal + * @param metadataSyncHandler the MdSync object + * @param ufsPath the ufsPath to sync + * @param alluxioPath the alluxio path matching the mounted ufsPath + * @param startAfter if the sync should start after a given internal path + * @param depth the depth of descendents to load + * @param syncInterval the sync interval + * @param loadByDirectory the load by directory type + * @return the running task object + */ + @VisibleForTesting + public Pair checkTask( + MetadataSyncHandler metadataSyncHandler, + AlluxioURI ufsPath, AlluxioURI alluxioPath, + @Nullable String startAfter, + DescendantType depth, long syncInterval, + DirectoryLoadType loadByDirectory) { + // TODO(elega/tcrain) This method needs to be updated to support nested sync + BaseTask task = launchTaskAsync(metadataSyncHandler, ufsPath, alluxioPath, startAfter, + depth, syncInterval, loadByDirectory, true); + return new Pair<>(task.waitForSync(ufsPath), task); + } + + @Override + public void close() throws IOException { + mLoadRequestExecutor.close(); + } + + private void registerMetrics() { + MetricsSystem.registerGaugeIfAbsent( + MetricsSystem.getMetricName( + MetricKey.MASTER_METADATA_SYNC_RUNNING_TASKS.getName()), + () -> { + synchronized (this) { + return mActiveTaskMap.size(); + } + }); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/AsyncUfsAbsentPathCache.java b/core/server/master/src/main/java/alluxio/master/file/meta/AsyncUfsAbsentPathCache.java index c69ebd10ac83..b4aa4af63c62 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/AsyncUfsAbsentPathCache.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/AsyncUfsAbsentPathCache.java @@ -82,7 +82,8 @@ public AsyncUfsAbsentPathCache(MountTable mountTable, int numThreads, Clock cloc mMountTable = mountTable; mClock = clock; mCurrentPaths = new ConcurrentHashMap<>(8, 0.95f, 8); - mCache = CacheBuilder.newBuilder().maximumSize(MAX_PATHS).recordStats().build(); + mCache = CacheBuilder.newBuilder().maximumSize(MAX_PATHS).concurrencyLevel(Configuration.getInt( + PropertyKey.MASTER_UFS_PATH_CACHE_THREADS)).recordStats().build(); /* Number of threads for the async pool. */ mPool = new ThreadPoolExecutor(numThreads, numThreads, THREAD_KEEP_ALIVE_SECONDS, diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/InodeDirectoryIdGenerator.java b/core/server/master/src/main/java/alluxio/master/file/meta/InodeDirectoryIdGenerator.java index d8b5180afec6..b52a5454fe31 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/InodeDirectoryIdGenerator.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/InodeDirectoryIdGenerator.java @@ -69,6 +69,16 @@ synchronized long getNewDirectoryId(JournalContext context) throws UnavailableEx return directoryId; } + /** + * @return the next directory id + */ + public long peekDirectoryId() { + DirectoryId directoryId = mNextDirectoryId; + long containerId = directoryId.getContainerId(); + long sequenceNumber = directoryId.getSequenceNumber(); + return BlockId.createBlockId(containerId, sequenceNumber); + } + private void initialize(JournalContext context) throws UnavailableException { if (!mInitialized) { applyAndJournal(context, toEntry(mContainerIdGenerator.getNewContainerId(), 0)); diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/InodeIterationResult.java b/core/server/master/src/main/java/alluxio/master/file/meta/InodeIterationResult.java new file mode 100644 index 000000000000..6eb70cd41fc5 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/meta/InodeIterationResult.java @@ -0,0 +1,50 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.meta; + +/** + * The inode, its full path, and the locked path. + */ +public class InodeIterationResult { + private final Inode mInode; + private final LockedInodePath mLockedPath; + + /** + * Creates an instance. + * @param inode the inode + * @param lockedPath the locked path + */ + public InodeIterationResult( + Inode inode, LockedInodePath lockedPath) { + mInode = inode; + mLockedPath = lockedPath; + } + + /** + * @return the inode + */ + public Inode getInode() { + return mInode; + } + + /** + * @return the locked path + */ + public LockedInodePath getLockedPath() { + return mLockedPath; + } + + @Override + public String toString() { + return mLockedPath.getUri().toString(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/InodeLockManager.java b/core/server/master/src/main/java/alluxio/master/file/meta/InodeLockManager.java index a87a4be13a76..2526c03f41aa 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/InodeLockManager.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/InodeLockManager.java @@ -163,6 +163,17 @@ public RWLockResource lockInode(InodeView inode, LockMode mode, boolean useTryLo return mInodeLocks.get(inode.getId(), mode, useTryLock); } + /** + * Acquires an inode lock using {@link Lock#lock()}. + * + * @param inodeId the inode id of the inode to lock + * @param mode the mode to lock in + * @return a lock resource which must be closed to release the lock + */ + public RWLockResource lockInode(Long inodeId, LockMode mode) { + return mInodeLocks.get(inodeId, mode, false); + } + /** * Attempts to acquire an inode lock. * diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java index 327c961427d7..480e2ee6707a 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java @@ -191,7 +191,7 @@ public boolean isWrite() { /** * Class for managing the persistent state of the inode tree. All metadata changes must go - * through this class by calling mState.applyAndJournal(context, entry). + * through this class by calling {@link InodeTreePersistentState#applyAndJournal(context, entry)}. */ private final InodeTreePersistentState mState; @@ -293,9 +293,21 @@ public Map getFileSizeHistogram() { * @param dir the inode directory */ public void setDirectChildrenLoaded(Supplier context, InodeDirectory dir) { + setDirectChildrenLoaded(context, dir, true); + } + + /** + * Marks an inode directory as having its direct children loaded or not. + * + * @param context journal context supplier + * @param dir the inode directory + * @param directChildrenLoaded whether to load the direct children if they were not loaded before + */ + public void setDirectChildrenLoaded(Supplier context, InodeDirectory dir, + boolean directChildrenLoaded) { mState.applyAndJournal(context, UpdateInodeDirectoryEntry.newBuilder() .setId(dir.getId()) - .setDirectChildrenLoaded(true) + .setDirectChildrenLoaded(directChildrenLoaded) .build()); } @@ -862,7 +874,7 @@ public List createPath(RpcContext rpcContext, LockedInodePath inodePath, InodeDirectoryView currentInodeDirectory = ancestorInode.asDirectory(); List createdInodes = new ArrayList<>(); - if (context.isPersisted()) { + if (context.isPersisted() && context.isPersistNonExistingParentDirectories()) { // Synchronously persist directories. These inodes are already READ locked. for (Inode inode : inodePath.getInodeList()) { if (!inode.isPersisted()) { @@ -901,23 +913,34 @@ public List createPath(RpcContext rpcContext, LockedInodePath inodePath, // NOTE, we set the mode of missing ancestor directories to be the default value, rather // than inheriting the option of the final file to create, because it may not have // "execute" permission. - CreateDirectoryContext missingDirContext = CreateDirectoryContext.defaults(); - missingDirContext.getOptions().setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() - .setTtl(context.getTtl()).setTtlAction(context.getTtlAction())); - missingDirContext.setWriteType(context.getWriteType()); - missingDirContext.setOperationTimeMs(context.getOperationTimeMs()); - missingDirContext.setMountPoint(false); - missingDirContext.setOwner(context.getOwner()); - missingDirContext.setGroup(context.getGroup()); - if (context.getXAttr() != null - && context.getXAttrPropStrat() != null - && context.getXAttrPropStrat() == XAttrPropagationStrategy.NEW_PATHS) { - missingDirContext.setXAttr(context.getXAttr()); - } StringBuilder pathBuilder = new StringBuilder().append( String.join(AlluxioURI.SEPARATOR, Arrays.asList(pathComponents).subList(0, pathIndex)) ); + CreateDirectoryContext missingDirContext = null; + if (pathIndex < pathComponents.length - 1) { + missingDirContext = CreateDirectoryContext.defaults(); + missingDirContext.getOptions().setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() + .setTtl(context.getTtl()).setTtlAction(context.getTtlAction())); + missingDirContext.setWriteType(context.getWriteType()); + missingDirContext.setOperationTimeMs(context.getOperationTimeMs()); + missingDirContext.setMountPoint(false); + missingDirContext.setOwner(context.getOwner()); + missingDirContext.setGroup(context.getGroup()); + if (context.isMetadataLoad() && !context.isPersistNonExistingParentDirectories()) { + // If this is a metadata load, and we are not going to persist internal + // directories (i.e. adding object markers), then we mark the internal + // directories as persisted + missingDirContext.setWriteType(WriteType.THROUGH); + missingDirContext.setMissingDirFingerprint(context::getMissingDirFingerprint); + } + if (context.getXAttr() != null + && context.getXAttrPropStrat() != null + && context.getXAttrPropStrat() == XAttrPropagationStrategy.NEW_PATHS) { + missingDirContext.setXAttr(context.getXAttr()); + } + } for (int k = pathIndex; k < (pathComponents.length - 1); k++) { + assert missingDirContext != null; MutableInodeDirectory newDir = MutableInodeDirectory.create( mDirectoryIdGenerator.getNewDirectoryId(rpcContext.getJournalContext()), currentInodeDirectory.getId(), pathComponents[k], missingDirContext); @@ -940,6 +963,10 @@ public List createPath(RpcContext rpcContext, LockedInodePath inodePath, newDir.setInternalAcl(pair.getFirst()); newDir.setDefaultACL(pair.getSecond()); } + if (context.isPersisted() && !context.isPersistNonExistingParentDirectories()) { + newDir.setPersistenceState(PersistenceState.PERSISTED); + newDir.setUfsFingerprint(context.getMissingDirFingerprint()); + } String newDirPath = k == 0 ? ROOT_PATH : pathBuilder.append(AlluxioURI.SEPARATOR).append(pathComponents[k]).toString(); mState.applyAndJournal(rpcContext, newDir, @@ -949,7 +976,7 @@ public List createPath(RpcContext rpcContext, LockedInodePath inodePath, // Persist the directory *after* it exists in the inode tree. This prevents multiple // concurrent creates from trying to persist the same directory name. - if (context.isPersisted()) { + if (context.isPersisted() && context.isPersistNonExistingParentDirectories()) { syncPersistExistingDirectory(rpcContext, newDir, context.isMetadataLoad()); } createdInodes.add(Inode.wrap(newDir)); @@ -1007,7 +1034,14 @@ public List createPath(RpcContext rpcContext, LockedInodePath inodePath, newInode = newDir; } else if (context instanceof CreateFileContext) { CreateFileContext fileContext = (CreateFileContext) context; - MutableInodeFile newFile = MutableInodeFile.create(mContainerIdGenerator.getNewContainerId(), + final long blockContainerId; + if (fileContext.getCompleteFileInfo() != null) { + blockContainerId = fileContext.getCompleteFileInfo().getContainerId(); + } else { + blockContainerId = mContainerIdGenerator.getNewContainerId(); + } + + MutableInodeFile newFile = MutableInodeFile.create(blockContainerId, currentInodeDirectory.getId(), name, System.currentTimeMillis(), fileContext); // if the parent has a default ACL, copy that default ACL ANDed with the umask as the new diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTreePersistentState.java b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTreePersistentState.java index 9d87f913a05d..8585053e9dd9 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTreePersistentState.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTreePersistentState.java @@ -56,6 +56,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.nio.file.Path; @@ -68,8 +69,11 @@ import java.util.Optional; import java.util.Queue; import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; import java.util.function.Supplier; import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nullable; /** @@ -203,6 +207,13 @@ public Set getToBePersistedIds() { return Collections.unmodifiableSet(mToBePersistedIds); } + /** + * @return the list of TTL buckets for tracking inode TTLs + */ + public TtlBucketList getTtlBuckets() { + return mTtlBuckets; + } + //// /// The applyAndJournal() methods make sure the in-memory metadata state and the journal are /// BOTH updated. Any exception seen here will crash the master! So if an exception should be @@ -817,6 +828,15 @@ public void resetState() { mOpIdCache.invalidateAll(); } + @Override + public CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.allOf(Stream.of(mInodeStore, mPinnedInodeFileIds, + mReplicationLimitedFileIds, mToBePersistedIds, mTtlBuckets, mInodeCounter) + .map(journaled -> journaled.writeToCheckpoint(directory, executorService)) + .toArray(CompletableFuture[]::new)); + } + @Override public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { // mTtlBuckets must come after mInodeStore so that it can query the inode store to resolve inode @@ -825,6 +845,15 @@ public void writeToCheckpoint(OutputStream output) throws IOException, Interrupt mReplicationLimitedFileIds, mToBePersistedIds, mTtlBuckets, mInodeCounter)); } + @Override + public CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.allOf(Stream.of(mInodeStore, mPinnedInodeFileIds, + mReplicationLimitedFileIds, mToBePersistedIds, mTtlBuckets, mInodeCounter) + .map(journaled -> journaled.restoreFromCheckpoint(directory, executorService)) + .toArray(CompletableFuture[]::new)); + } + @Override public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { // mTtlBuckets must come after mInodeStore so that it can query the inode store to resolve inode diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/LockedInodePath.java b/core/server/master/src/main/java/alluxio/master/file/meta/LockedInodePath.java index c9aee1e81168..649286a3350b 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/LockedInodePath.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/LockedInodePath.java @@ -20,6 +20,7 @@ import alluxio.exception.InvalidPathException; import alluxio.exception.status.UnavailableException; import alluxio.master.file.meta.InodeTree.LockPattern; +import alluxio.master.journal.FileSystemMergeJournalContext; import alluxio.master.journal.JournalContext; import alluxio.master.metastore.ReadOnlyInodeStore; import alluxio.resource.AlluxioResourceLeakDetectorFactory; @@ -85,9 +86,7 @@ public class LockedInodePath implements Closeable { @Nullable private final ResourceLeakTracker mTracker; /** To determine if we should flush the journals when lock is released or scope reduced. */ - private final boolean mMergeInodeJournals = Configuration.getBoolean( - PropertyKey.MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS - ); + private final boolean mMergeInodeJournals; /** * Keeps a reference of JournalContext and flushes it before the lock is released. @@ -159,6 +158,9 @@ public LockedInodePath(AlluxioURI uri, ReadOnlyInodeStore inodeStore, mLockList = new SimpleInodeLockList(inodeLockManager, mUseTryLock); mTracker = DETECTOR.track(this); mJournalContext = journalContext; + mMergeInodeJournals = Configuration.getBoolean( + PropertyKey.MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS + ) && mJournalContext instanceof FileSystemMergeJournalContext; } /** @@ -184,6 +186,9 @@ private LockedInodePath(AlluxioURI uri, LockedInodePath path, String[] pathCompo // So the new created LockInodePath instance must be on the same thread with // the original one and hence they will use the same JournalContext. mJournalContext = path.mJournalContext; + mMergeInodeJournals = Configuration.getBoolean( + PropertyKey.MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS + ) && mJournalContext instanceof FileSystemMergeJournalContext; } /** @@ -404,7 +409,28 @@ public LockedInodePath lockDescendant(AlluxioURI descendantUri, LockPattern lock */ public LockedInodePath lockChild(Inode child, LockPattern lockPattern) throws InvalidPathException { - return lockChild(child, lockPattern, addComponent(mPathComponents, child.getName())); + return lockChild(child, lockPattern, true); + } + + /** + * Returns a new locked inode path composed of the current path plus the child inode. + * The original locked inode path is unaffected. + * The path is traversed or not depending on the shouldTraverse parameter. + * + * childComponentsHint can be used to save the work of computing path components when the path + * components for the new path are already known. + * + * On failure, all locks taken by this method will be released. + * + * @param child the child inode + * @param lockPattern the lock pattern + * @param shouldTraverse if the path should be traversed or not + * @return the new locked path + */ + public LockedInodePath lockChild(Inode child, LockPattern lockPattern, boolean shouldTraverse) + throws InvalidPathException { + return lockChild(child, lockPattern, addComponent(mPathComponents, child.getName()), + shouldTraverse); } /** @@ -418,7 +444,23 @@ public LockedInodePath lockChild(Inode child, LockPattern lockPattern) */ public LockedInodePath lockChild(Inode child, LockPattern lockPattern, String[] childComponentsHint) throws InvalidPathException { - return lockChildByName(child.getName(), lockPattern, childComponentsHint); + return lockChildByName(child.getName(), lockPattern, childComponentsHint, true); + } + + /** + * Efficient version of {@link #lockChild(Inode, LockPattern)} for when the child path + * components are already known. + * + * @param child the child inode + * @param lockPattern the lock pattern + * @param childComponentsHint path components for the new path + * @param shouldTraverse if the path should be traversed or not + * @return the new locked path + */ + public LockedInodePath lockChild( + Inode child, LockPattern lockPattern, String[] childComponentsHint, + boolean shouldTraverse) throws InvalidPathException { + return lockChildByName(child.getName(), lockPattern, childComponentsHint, shouldTraverse); } /** @@ -428,13 +470,16 @@ public LockedInodePath lockChild(Inode child, LockPattern lockPattern, * @param childName the name of the child inode * @param lockPattern the lock pattern * @param childComponentsHint path components for the new path + * @param shouldTraverse if the path should be traversed or not * @return the new locked path */ public LockedInodePath lockChildByName(String childName, LockPattern lockPattern, - String[] childComponentsHint) throws InvalidPathException { + String[] childComponentsHint, boolean shouldTraverse) throws InvalidPathException { LockedInodePath path = new LockedInodePath(mUri.joinUnsafe(childName), this, childComponentsHint, lockPattern, mUseTryLock); - path.traverseOrClose(); + if (shouldTraverse) { + path.traverseOrClose(); + } return path; } diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/LockingScheme.java b/core/server/master/src/main/java/alluxio/master/file/meta/LockingScheme.java index 07f64136803d..8cacbe364cf6 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/LockingScheme.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/LockingScheme.java @@ -32,13 +32,19 @@ public final class LockingScheme { private final LockPattern mDesiredLockPattern; private final SyncCheck mShouldSync; + // CHECKSTYLE.OFF: LineLengthExceed - cannot break the method link /** * Constructs a {@link LockingScheme}. * + * Avoid using this constructor where shouldSync is set true, if possible. + * {@link #LockingScheme(AlluxioURI, LockPattern, FileSystemMasterCommonPOptions, UfsSyncPathCache, DescendantType)} + * is the preferred one in such case, to make the metadata sync dedup feature work. + * * @param path the path to lock * @param desiredLockPattern the desired lock mode * @param shouldSync true if the path should be synced */ + // CHECKSTYLE.ON: LineLengthExceed public LockingScheme(AlluxioURI path, LockPattern desiredLockPattern, boolean shouldSync) { mPath = path; mDesiredLockPattern = desiredLockPattern; diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java b/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java index ee5a8858de08..1dce972be1ae 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java @@ -12,6 +12,7 @@ package alluxio.master.file.meta; import alluxio.AlluxioURI; +import alluxio.conf.Configuration; import alluxio.exception.AccessControlException; import alluxio.exception.ExceptionMessage; import alluxio.exception.FileAlreadyExistsException; @@ -38,6 +39,7 @@ import alluxio.resource.LockResource; import alluxio.underfs.UfsManager; import alluxio.underfs.UnderFileSystem; +import alluxio.underfs.UnderFileSystemConfiguration; import alluxio.util.IdUtils; import alluxio.util.io.PathUtils; @@ -95,11 +97,11 @@ public MountTable(UfsManager ufsManager, MountInfo rootMountInfo, Clock clock) { mReadLock = lock.readLock(); mWriteLock = lock.writeLock(); mUfsManager = ufsManager; - mState = new State(rootMountInfo, clock); + mState = new State(rootMountInfo, clock, mUfsManager); } /** - * Returns the underlying writelock of the MountTable. This method will be called when + * Returns the underlying write lock of the MountTable. This method will be called when * fileSystemMaster is adding a new MountPoint. * * @return the write lock of the mountTable @@ -160,7 +162,7 @@ public void addValidated(Supplier journalContext, * Verify if the given (alluxioPath, ufsPath) can be inserted into MountTable. This method is * NOT ThreadSafe. This method will not acquire any locks, so the caller MUST apply the lock * first before calling this method. - * @param alluxioUri the alluxio path that is about to be the mountpoint + * @param alluxioUri the alluxio path that is about to be the mount point * @param ufsUri the UFS path that is about to mount * @param mountId the mount id * @param options the mount options @@ -257,7 +259,7 @@ public boolean delete(Supplier journalContext, AlluxioURI uri, for (String mountPath : mState.getMountTable().keySet()) { try { if (PathUtils.hasPrefix(mountPath, path) && (!path.equals(mountPath))) { - LOG.warn("The path to unmount {} contains another nested mountpoint {}", + LOG.warn("The path to unmount {} contains another nested mount point {}", path, mountPath); return false; } @@ -321,13 +323,13 @@ public void update(Supplier journalContext, AlluxioURI alluxioUr public String getMountPoint(AlluxioURI uri) throws InvalidPathException { String path = uri.getPath(); String lastMount = ROOT; + List possibleMounts = PathUtils.getPossibleMountPoints(path); try (LockResource r = new LockResource(mReadLock)) { - for (Map.Entry entry : mState.getMountTable().entrySet()) { - String mount = entry.getKey(); - // we choose a new candidate path if the previous candidatepath is a prefix - // of the current alluxioPath and the alluxioPath is a prefix of the path - if (!mount.equals(ROOT) && PathUtils.hasPrefix(path, mount) - && lastMount.length() < mount.length()) { + Map mountTable = mState.getMountTable(); + for (String mount: possibleMounts) { + if (mountTable.containsKey(mount)) { + // results in `possibleMounts` are from shortest to longest, so it will get the + // longest matching below lastMount = mount; } } @@ -354,6 +356,17 @@ public Map getMountTable() { */ public boolean containsMountPoint(AlluxioURI uri, boolean containsSelf) throws InvalidPathException { + return containsMountPoint(uri, containsSelf, true); + } + + /** + * @param uri the Alluxio uri to check + * @param containsSelf cause method to return true when given uri itself is a mount point + * @param cleanPath if the paths should be cleaned + * @return true if the given uri has a descendant which is a mount point [, or is a mount point] + */ + public boolean containsMountPoint(AlluxioURI uri, boolean containsSelf, boolean cleanPath) + throws InvalidPathException { String path = uri.getPath(); try (LockResource r = new LockResource(mReadLock)) { @@ -362,7 +375,7 @@ public boolean containsMountPoint(AlluxioURI uri, boolean containsSelf) if (!containsSelf && mountPath.equals(path)) { continue; } - if (PathUtils.hasPrefix(mountPath, path)) { + if (PathUtils.hasPrefix(mountPath, path, cleanPath)) { return true; } } @@ -690,15 +703,17 @@ public final class State implements Journaled { private final Map mMountTable; /** Map from mount id to cache of paths which have been synced with UFS. */ private final UfsSyncPathCache mUfsSyncPathCache; + private final UfsManager mUfsManager; /** * @param mountInfo root mount info * @param clock the clock used for computing sync times */ - State(MountInfo mountInfo, Clock clock) { + State(MountInfo mountInfo, Clock clock, UfsManager ufsManager) { mMountTable = new HashMap<>(10); mMountTable.put(MountTable.ROOT, mountInfo); mUfsSyncPathCache = new UfsSyncPathCache(clock); + mUfsManager = ufsManager; } /** @@ -727,13 +742,19 @@ public void applyAndJournal(Supplier context, DeleteMountPointEn private void applyAddMountPoint(AddMountPointEntry entry) { try (LockResource r = new LockResource(mWriteLock)) { MountInfo mountInfo = fromAddMountPointEntry(entry); + UnderFileSystemConfiguration ufsConf = new UnderFileSystemConfiguration( + Configuration.global(), mountInfo.getOptions().getReadOnly()) + .createMountSpecificConf(mountInfo.getOptions().getPropertiesMap()); mMountTable.put(entry.getAlluxioPath(), mountInfo); + mUfsManager.addMount(mountInfo.getMountId(), mountInfo.getUfsUri(), ufsConf); } } private void applyDeleteMountPoint(DeleteMountPointEntry entry) { try (LockResource r = new LockResource(mWriteLock)) { + long mountId = mMountTable.get(entry.getAlluxioPath()).getMountId(); mMountTable.remove(entry.getAlluxioPath()); + mUfsManager.removeMount(mountId); } } diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeDirectory.java b/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeDirectory.java index 2118fd68b04e..675fa80a8851 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeDirectory.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeDirectory.java @@ -230,7 +230,7 @@ public static MutableInodeDirectory fromJournalEntry(InodeDirectoryEntry entry) */ public static MutableInodeDirectory create(long id, long parentId, String name, CreateDirectoryContext context) { - return new MutableInodeDirectory(id) + MutableInodeDirectory directory = new MutableInodeDirectory(id) .setParentId(parentId) .setName(name) .setTtl(context.getTtl()) @@ -243,6 +243,10 @@ public static MutableInodeDirectory create(long id, long parentId, String name, .setAcl(context.getDefaultAcl()) .setMountPoint(context.isMountPoint()) .setXAttr(context.getXAttr()); + if (context.getFingerprint() != null) { + directory.setUfsFingerprint(context.getFingerprint()); + } + return directory; } @Override diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeFile.java b/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeFile.java index eb8e254b3c0b..82bbea15677e 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeFile.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeFile.java @@ -439,8 +439,8 @@ public static MutableInodeFile create(long blockContainerId, long parentId, Stri CreateFilePOptionsOrBuilder options = context.getOptions(); Preconditions.checkArgument( options.getReplicationMax() == Constants.REPLICATION_MAX_INFINITY - || options.getReplicationMax() >= options.getReplicationMin()); - return new MutableInodeFile(blockContainerId) + || options.getReplicationMax() >= options.getReplicationMin()); + MutableInodeFile inodeFile = new MutableInodeFile(blockContainerId) .setBlockSizeBytes(options.getBlockSizeBytes()) .setCreationTimeMs(creationTimeMs) .setName(name) @@ -462,6 +462,15 @@ public static MutableInodeFile create(long blockContainerId, long parentId, Stri ? Constants.NO_AUTO_PERSIST : System.currentTimeMillis() + options.getPersistenceWaitTime()) .setXAttr(context.getXAttr()); + if (context.getFingerprint() != null) { + inodeFile.setUfsFingerprint(context.getFingerprint()); + } + if (context.getCompleteFileInfo() != null) { + inodeFile.setBlockIds(context.getCompleteFileInfo().getBlockIds()); + inodeFile.setCompleted(true); + inodeFile.setLength(context.getCompleteFileInfo().getLength()); + } + return inodeFile; } @Override diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/SimpleInodeLockList.java b/core/server/master/src/main/java/alluxio/master/file/meta/SimpleInodeLockList.java index 4e74e780f450..51f13b2765b1 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/SimpleInodeLockList.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/SimpleInodeLockList.java @@ -88,12 +88,32 @@ public void lockInode(Inode inode, LockMode mode) { Preconditions.checkState(!endsInInode(), "Cannot lock inode %s for lock list %s because the lock list already ends in an inode", inode.getId(), this); - Preconditions.checkState(inode.getName().equals(mLastEdge.getName()), - "Expected to lock inode %s but locked inode %s", mLastEdge.getName(), inode.getName()); + checkInodeNameAndEdgeNameMatch(inode); } lockAndAddInode(inode, mode); } + /** + * Checks if the inode name and the edge name match. + * @param inode the inode to check + */ + private void checkInodeNameAndEdgeNameMatch(Inode inode) throws IllegalStateException { + if (!inode.getName().equals(mLastEdge.getName())) { + StringBuilder sb = new StringBuilder(); + for (InodeView currentInode : mInodes) { + sb.append("["); + sb.append(currentInode.toProto()); + sb.append("]->"); + } + sb.append("[END]"); + throw new IllegalStateException( + String.format( + "Expected to lock inode %s but locked inode name %s, id: %s, parent_id: %s. %n", + mLastEdge.getName(), inode.getName(), inode.getId(), inode.getParentId()) + + "Locked inode path: " + sb); + } + } + @Override public void lockEdge(Inode lastInode, String childName, LockMode mode) { mode = nextLockMode(mode); diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucket.java b/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucket.java index eda16f7dc6a8..fcb0659d6068 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucket.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucket.java @@ -17,6 +17,8 @@ import com.google.common.base.Objects; import java.util.Collection; +import java.util.Collections; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import javax.annotation.concurrent.ThreadSafe; @@ -34,16 +36,18 @@ public final class TtlBucket implements Comparable { */ private static long sTtlIntervalMs = Configuration.getMs(PropertyKey.MASTER_TTL_CHECKER_INTERVAL_MS); + public static final int DEFAULT_RETRY_ATTEMPTS = 5; /** * Each bucket has a time to live interval, this value is the start of the interval, interval * value is the same as the configuration of {@link PropertyKey#MASTER_TTL_CHECKER_INTERVAL_MS}. */ private final long mTtlIntervalStartTimeMs; /** - * A collection of inodes whose ttl value is in the range of this bucket's interval. The mapping - * is from inode id to inode. + * A collection containing those inodes whose ttl value is + * in the range of this bucket's interval. The mapping + * is from inode id to the number of left retry to process. */ - private final ConcurrentHashMap mInodes; + private final ConcurrentHashMap mInodeToRetryMap; /** * Creates a new instance of {@link TtlBucket}. @@ -52,7 +56,7 @@ public final class TtlBucket implements Comparable { */ public TtlBucket(long startTimeMs) { mTtlIntervalStartTimeMs = startTimeMs; - mInodes = new ConcurrentHashMap<>(); + mInodeToRetryMap = new ConcurrentHashMap<>(); } /** @@ -78,29 +82,57 @@ public static long getTtlIntervalMs() { } /** - * @return the set of all inodes in the bucket backed by the internal set, changes made to the - * returned set will be shown in the internal set, and vice versa + * @return an unmodifiable view of all inodes ids in the bucket */ - public Collection getInodes() { - return mInodes.values(); + public Collection getInodeIds() { + return Collections.unmodifiableSet(mInodeToRetryMap.keySet()); } /** - * Adds a inode to the bucket. + * Get collection of inode to its left ttl process retry attempts. + * @return collection of inode to its left ttl process retry attempts + */ + public Collection> getInodeExpiries() { + return Collections.unmodifiableSet(mInodeToRetryMap.entrySet()); + } + + /** + * Adds an inode with default num of retry attempt to expire. + * @param inode + */ + public void addInode(Inode inode) { + addInode(inode, DEFAULT_RETRY_ATTEMPTS); + } + + /** + * Adds an inode to the bucket with a specific left retry number. * * @param inode the inode to be added + * @param numOfRetry num of retries left when added to the ttlbucket */ - public void addInode(Inode inode) { - mInodes.put(inode.getId(), inode); + public void addInode(Inode inode, int numOfRetry) { + mInodeToRetryMap.compute(inode.getId(), (k, v) -> { + if (v != null) { + return Math.min(v, numOfRetry); + } + return numOfRetry; + }); } /** - * Removes a inode from the bucket. + * Removes an inode from the bucket. * * @param inode the inode to be removed */ public void removeInode(InodeView inode) { - mInodes.remove(inode.getId()); + mInodeToRetryMap.remove(inode.getId()); + } + + /** + * @return the number of inodes in the bucket + */ + public int size() { + return mInodeToRetryMap.size(); } /** diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucketList.java b/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucketList.java index a815117e725a..a160a2d488dc 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucketList.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucketList.java @@ -26,6 +26,7 @@ import java.io.EOFException; import java.io.IOException; import java.io.OutputStream; +import java.util.HashSet; import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentSkipListSet; @@ -59,6 +60,29 @@ public TtlBucketList(ReadOnlyInodeStore inodeStore) { mBucketList = new ConcurrentSkipListSet<>(); } + /** + * Load inode from inode store on processing the provided inode id. + * @param inodeId + * @return Inode + */ + public Inode loadInode(long inodeId) { + return mInodeStore.get(inodeId).orElseGet(null); + } + + /** + * @return the number of TTL buckets + */ + public int getNumBuckets() { + return mBucketList.size(); + } + + /** + * @return the total number of inodes in all the buckets + */ + public long getNumInodes() { + return mBucketList.stream().mapToInt((bucket) -> bucket.size()).sum(); + } + /** * Gets the bucket in the list that contains the inode. * @@ -88,6 +112,14 @@ private TtlBucket getBucketContaining(InodeView inode) { return bucket; } + /** + * Insert inode to the ttlbucket with default number of retry attempts. + * @param inode + */ + public void insert(Inode inode) { + insert(inode, TtlBucket.DEFAULT_RETRY_ATTEMPTS); + } + /** * Inserts an inode to the appropriate bucket where its ttl end time lies in the * bucket's interval, if no appropriate bucket exists, a new bucket will be created to contain @@ -95,8 +127,9 @@ private TtlBucket getBucketContaining(InodeView inode) { * buckets and nothing will happen. * * @param inode the inode to be inserted + * @param numOfRetry number of retries left to process this inode */ - public void insert(Inode inode) { + public void insert(Inode inode, int numOfRetry) { if (inode.getTtl() == Constants.NO_TTL) { return; } @@ -104,36 +137,40 @@ public void insert(Inode inode) { TtlBucket bucket; while (true) { bucket = getBucketContaining(inode); - if (bucket != null) { - break; + if (bucket == null) { + long ttlEndTimeMs = inode.getCreationTimeMs() + inode.getTtl(); + // No bucket contains the inode, so a new bucket should be added with an appropriate + // interval start. Assume the list of buckets have continuous intervals, and the + // first interval starts at 0, then ttlEndTimeMs should be in number + // (ttlEndTimeMs / interval) interval, so the start time of this interval should be + // (ttlEndTimeMs / interval) * interval. + long interval = TtlBucket.getTtlIntervalMs(); + bucket = new TtlBucket(interval == 0 ? ttlEndTimeMs : ttlEndTimeMs / interval * interval); + if (!mBucketList.add(bucket)) { + // If we reach here, it means the same bucket has been concurrently inserted by another + // thread, try again. + continue; + } } - long ttlEndTimeMs = inode.getCreationTimeMs() + inode.getTtl(); - // No bucket contains the inode, so a new bucket should be added with an appropriate interval - // start. Assume the list of buckets have continuous intervals, and the first interval starts - // at 0, then ttlEndTimeMs should be in number (ttlEndTimeMs / interval) interval, so the - // start time of this interval should be (ttlEndTimeMs / interval) * interval. - long interval = TtlBucket.getTtlIntervalMs(); - bucket = new TtlBucket(interval == 0 ? ttlEndTimeMs : ttlEndTimeMs / interval * interval); - if (mBucketList.add(bucket)) { + bucket.addInode(inode, numOfRetry); + /* if we added to the bucket but it got concurrently polled by InodeTtlChecker, + we're not sure this newly-added inode will be processed by the checker, + so we need to try insert again. */ + if (mBucketList.contains(bucket)) { break; } - // If we reach here, it means the same bucket has been concurrently inserted by another - // thread. } - // TODO(zhouyufa): Consider the concurrent situation that the bucket is expired and processed by - // the InodeTtlChecker, then adding the inode into the bucket is meaningless since the bucket - // will not be accessed again. (c.f. ALLUXIO-2821) - bucket.addInode(inode); } /** - * Removes a inode from the bucket containing it if the inode is in one of the buckets, otherwise, - * do nothing. + * Removes an inode from the bucket containing it if the inode is in one + * of the buckets, otherwise, do nothing. * *

* Assume that no inode in the buckets has ttl value that equals {@link Constants#NO_TTL}. - * If a inode with valid ttl value is inserted to the buckets and its ttl value is going to be set - * to {@link Constants#NO_TTL} later, be sure to remove the inode from the buckets first. + * If an inode with valid ttl value is inserted to the buckets and its ttl value is + * going to be set to {@link Constants#NO_TTL} later, be sure to remove the inode + * from the buckets first. * * @param inode the inode to be removed */ @@ -145,24 +182,22 @@ public void remove(InodeView inode) { } /** - * Retrieves buckets whose ttl interval has expired before the specified time, that is, the + * Polls buckets whose ttl interval has expired before the specified time, that is, the * bucket's interval start time should be less than or equal to (specified time - ttl interval). - * The returned set is backed by the internal set. - * + * if concurrently there are new inodes added to those polled buckets, we check if after the + * moment it got added and the bucket got polled out, we're not sure if InodeTtlChecker will + * process it sa part of this batch, it will create a new bucket and added there to retry. + * Check {@link TtlBucketList#insert(Inode)} * @param time the expiration time * @return a set of expired buckets or an empty set if no buckets have expired */ - public Set getExpiredBuckets(long time) { - return mBucketList.headSet(new TtlBucket(time - TtlBucket.getTtlIntervalMs()), true); - } - - /** - * Removes all buckets in the set. - * - * @param buckets a set of buckets to be removed - */ - public void removeBuckets(Set buckets) { - mBucketList.removeAll(buckets); + public Set pollExpiredBuckets(long time) { + Set expiredBuckets = new HashSet<>(); + TtlBucket upperBound = new TtlBucket(time - TtlBucket.getTtlIntervalMs()); + while (!mBucketList.isEmpty() && mBucketList.first().compareTo(upperBound) <= 0) { + expiredBuckets.add(mBucketList.pollFirst()); + } + return expiredBuckets; } @Override @@ -170,12 +205,17 @@ public CheckpointName getCheckpointName() { return CheckpointName.TTL_BUCKET_LIST; } + /* + Checkpointing a snapshot of the current inodes in ttlbucketlist. It's ok we checkpointed + some inodes that have already been processed during the process as the expiry of inode + will be double-checked at time of processing in InodeTtlChecker. + */ @Override public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { CheckpointOutputStream cos = new CheckpointOutputStream(output, CheckpointType.LONGS); for (TtlBucket bucket : mBucketList) { - for (Inode inode : bucket.getInodes()) { - cos.writeLong(inode.getId()); + for (long inodeId : bucket.getInodeIds()) { + cos.writeLong(inodeId); } } } diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncPathCache.java b/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncPathCache.java index a76bb7f8dfe1..f236abe43693 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncPathCache.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncPathCache.java @@ -90,7 +90,8 @@ public UfsSyncPathCache(Clock clock) { @VisibleForTesting UfsSyncPathCache(Clock clock, @Nullable BiConsumer onRemoval) { mClock = Preconditions.checkNotNull(clock); - mItems = CacheBuilder.newBuilder() + mItems = CacheBuilder.newBuilder().concurrencyLevel( + Configuration.getInt(PropertyKey.MASTER_UFS_PATH_CACHE_THREADS)) .removalListener( (removal) -> { if (removal.wasEvicted() && removal.getKey() != null && removal.getValue() != null) { @@ -319,8 +320,9 @@ private void updateParentInvalidation(SyncState state, long time, long parentLev * @param syncTime the time to set the sync success to, if null then the current * clock time is used * @param isFile true if the synced path is a file + * @return the sync state */ - public void notifySyncedPath( + public SyncState notifySyncedPath( AlluxioURI path, DescendantType descendantType, long startTime, @Nullable Long syncTime, boolean isFile) { long time = syncTime == null ? startTime : @@ -329,9 +331,10 @@ public void notifySyncedPath( try (LockResource ignored = new LockResource(mRootLock)) { Preconditions.checkState(!isFile); updateSyncState(mRoot, time, startTime, false, descendantType); + return mRoot; } } else { - mItems.asMap().compute(path.getPath(), (key, state) -> { + return mItems.asMap().compute(path.getPath(), (key, state) -> { if (state == null) { state = new SyncState(isFile); } diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncUtils.java b/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncUtils.java index 0babb5c8d0c9..2e5d29362ea6 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncUtils.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncUtils.java @@ -13,6 +13,8 @@ import alluxio.underfs.Fingerprint; +import com.google.common.base.Preconditions; + import javax.annotation.concurrent.NotThreadSafe; /** @@ -37,6 +39,7 @@ public static SyncPlan computeSyncPlan(Inode inode, Fingerprint ufsFingerprint, // issues#15211: If Inodes store a Fingerprint proto instead of Strings, // we would save many String parsings here. Fingerprint inodeFingerprint = Fingerprint.parse(inode.getUfsFingerprint()); + Preconditions.checkState(inodeFingerprint != null, "Got invalid UFS fingerprint"); boolean isContentSynced = inodeUfsIsContentSynced(inode, inodeFingerprint, ufsFingerprint); boolean isMetadataSynced = inodeUfsIsMetadataSynced(inode, inodeFingerprint, ufsFingerprint); boolean ufsExists = ufsFingerprint.isValid(); diff --git a/core/server/master/src/main/java/alluxio/master/file/replication/ReplicationChecker.java b/core/server/master/src/main/java/alluxio/master/file/replication/ReplicationChecker.java index 44e801dc29d9..d669f182bbdb 100644 --- a/core/server/master/src/main/java/alluxio/master/file/replication/ReplicationChecker.java +++ b/core/server/master/src/main/java/alluxio/master/file/replication/ReplicationChecker.java @@ -148,7 +148,7 @@ private boolean shouldRun() { * (2) Is there any blocks over replicated, schedule evict jobs to reduce the replication level. */ @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { if (!shouldRun()) { return; } diff --git a/core/server/master/src/main/java/alluxio/master/job/AbstractJob.java b/core/server/master/src/main/java/alluxio/master/job/AbstractJob.java new file mode 100644 index 000000000000..eb3e31bff832 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/AbstractJob.java @@ -0,0 +1,109 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import static java.util.Objects.requireNonNull; + +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobState; +import alluxio.scheduler.job.Task; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Optional; +import java.util.OptionalLong; + +/** + * Abstract class for job. It provides basic job information and state management. + * + * @param the type of the task of the job + */ +public abstract class AbstractJob> implements Job { + private static final Logger LOG = LoggerFactory.getLogger(LoadJob.class); + protected final String mJobId; + protected JobState mState; + protected OptionalLong mEndTime = OptionalLong.empty(); + protected final long mStartTime; + protected final Optional mUser; + + /** + * Creates a new instance of {@link AbstractJob}. + * + * @param user the user who submitted the job + * @param jobId the job id + */ + public AbstractJob(Optional user, String jobId) { + mUser = requireNonNull(user, "user is null"); + mJobId = requireNonNull(jobId, "jobId is null"); + mState = JobState.RUNNING; + mStartTime = System.currentTimeMillis(); + } + + @Override + public String getJobId() { + return mJobId; + } + + /** + * Get end time. + * + * @return end time + */ + @Override + public OptionalLong getEndTime() { + return mEndTime; + } + + /** + * Update end time. + * + * @param time time in ms + */ + public void setEndTime(long time) { + mEndTime = OptionalLong.of(time); + } + + /** + * Get load status. + * + * @return the load job's status + */ + @Override + public JobState getJobState() { + return mState; + } + + /** + * Set load state. + * + * @param state new state + */ + @Override + public void setJobState(JobState state) { + LOG.debug("Change JobState to {} for job {}", state, this); + mState = state; + if (!isRunning()) { + mEndTime = OptionalLong.of(System.currentTimeMillis()); + } + } + + @Override + public boolean isRunning() { + return mState == JobState.RUNNING || mState == JobState.VERIFYING; + } + + @Override + public boolean isDone() { + return mState == JobState.SUCCEEDED || mState == JobState.FAILED; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/job/FileIterable.java b/core/server/master/src/main/java/alluxio/master/job/FileIterable.java new file mode 100644 index 000000000000..1a5db1b384c2 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/FileIterable.java @@ -0,0 +1,214 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import static java.util.Objects.requireNonNull; + +import alluxio.AlluxioURI; +import alluxio.exception.AccessControlException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.exception.runtime.NotFoundRuntimeException; +import alluxio.exception.runtime.UnauthenticatedRuntimeException; +import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.ListStatusPartialPOptions; +import alluxio.master.file.FileSystemMaster; +import alluxio.master.file.contexts.CheckAccessContext; +import alluxio.master.file.contexts.ListStatusContext; +import alluxio.security.authentication.AuthenticatedClientUser; +import alluxio.wire.BlockInfo; +import alluxio.wire.FileBlockInfo; +import alluxio.wire.FileInfo; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Predicate; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +/** + * Iterable for {@link FileInfo} objects. Generates the list of files from file system master. + */ +public class FileIterable implements Iterable { + private final FileSystemMaster mFileSystemMaster; + private final String mPath; + private final Optional mUser; + private final boolean mUsePartialListing; + private final Predicate mFilter; + + /** + * Creates a new instance of {@link FileIterable}. + * + * @param fileSystemMaster file system master + * @param path path to list + * @param user user to list as + * @param usePartialListing whether to use partial listing + * @param filter filter to apply to the file infos + */ + public FileIterable(FileSystemMaster fileSystemMaster, String path, Optional user, + boolean usePartialListing, Predicate filter) { + mFileSystemMaster = requireNonNull(fileSystemMaster, "fileSystemMaster is null"); + mPath = requireNonNull(path, "path is null"); + mUser = requireNonNull(user, "user is null"); + mUsePartialListing = usePartialListing; + mFilter = filter; + } + + /** + * @return file iterator. generate new iterator each time + */ + public FileIterator iterator() { + return new FileIterator(mFileSystemMaster, mPath, mUser, mUsePartialListing, mFilter); + } + + /** + * An iterator over {@link FileInfo} objects. + */ + public class FileIterator implements Iterator { + private final ListStatusPOptions.Builder mListOptions = ListStatusPOptions + .newBuilder() + .setRecursive(true); + private static final int PARTIAL_LISTING_BATCH_SIZE = 100; + private final FileSystemMaster mFileSystemMaster; + private final String mPath; + private final Optional mUser; + private final boolean mUsePartialListing; + private final Predicate mFilter; + private String mStartAfter = ""; + private List mFiles; + private Iterator mFileInfoIterator; + private final AtomicLong mTotalFileCount = new AtomicLong(); + private final AtomicLong mTotalByteCount = new AtomicLong(); + + /** + * Creates a new instance of {@link FileIterator}. + * + * @param fileSystemMaster file system master + * @param path path to list + * @param user user to list as + * @param usePartialListing whether to use partial listing + * @param filter filter to apply to the file infos + */ + public FileIterator(FileSystemMaster fileSystemMaster, String path, Optional user, + boolean usePartialListing, Predicate filter) { + mFileSystemMaster = requireNonNull(fileSystemMaster, "fileSystemMaster is null"); + mPath = requireNonNull(path, "path is null"); + mUser = requireNonNull(user, "user is null"); + mUsePartialListing = usePartialListing; + mFilter = filter; + checkAccess(); + if (usePartialListing) { + partialListFileInfos(); + } + else { + listFileInfos(ListStatusContext.create(mListOptions)); + } + } + + private void checkAccess() { + AuthenticatedClientUser.set(mUser.orElse(null)); + try { + mFileSystemMaster.checkAccess(new AlluxioURI(mPath), CheckAccessContext.defaults()); + } catch (FileDoesNotExistException | InvalidPathException e) { + throw new NotFoundRuntimeException(e); + } catch (AccessControlException e) { + throw new UnauthenticatedRuntimeException(e); + } catch (IOException e) { + throw AlluxioRuntimeException.from(e); + } + } + + @Override + public boolean hasNext() { + if (mUsePartialListing && !mFileInfoIterator.hasNext()) { + partialListFileInfos(); + } + return mFileInfoIterator.hasNext(); + } + + @Override + public FileInfo next() { + if (mUsePartialListing && !mFileInfoIterator.hasNext()) { + partialListFileInfos(); + } + return mFileInfoIterator.next(); + } + + private void partialListFileInfos() { + if (!mStartAfter.isEmpty()) { + mListOptions.setDisableAreDescendantsLoadedCheck(true); + } + Supplier context = () -> { + return ListStatusContext.create(ListStatusPartialPOptions + .newBuilder() + .setOptions(mListOptions) + .setBatchSize(PARTIAL_LISTING_BATCH_SIZE) + .setStartAfter(mStartAfter)); + }; + + List fileInfos; + while ((fileInfos = listStatus(context.get())) != null + && (mFiles = fileInfos.stream().filter(mFilter).collect(Collectors.toList())).isEmpty() + && !fileInfos.isEmpty()) { + mStartAfter = fileInfos.get(fileInfos.size() - 1).getPath(); + mListOptions.setDisableAreDescendantsLoadedCheck(true); + } + if (mFiles.size() > 0) { + mStartAfter = mFiles + .get(mFiles.size() - 1) + .getPath(); + } + updateIterator(); + } + + private void listFileInfos(ListStatusContext context) { + mFiles = listStatus(context).stream().filter(mFilter).collect(Collectors.toList()); + updateIterator(); + } + + private List listStatus(ListStatusContext context) { + try { + AuthenticatedClientUser.set(mUser.orElse(null)); + return mFileSystemMaster.listStatus(new AlluxioURI(mPath), context); + } catch (FileDoesNotExistException | InvalidPathException e) { + throw new NotFoundRuntimeException(e); + } catch (AccessControlException e) { + throw new UnauthenticatedRuntimeException(e); + } catch (IOException e) { + throw AlluxioRuntimeException.from(e); + } finally { + AuthenticatedClientUser.remove(); + } + } + + private void updateIterator() { + mFileInfoIterator = mFiles.iterator(); + mTotalFileCount.set(mFiles.size()); + mTotalByteCount.set(mFiles + .stream() + .map(FileInfo::getFileBlockInfos) + .flatMap(Collection::stream) + .map(FileBlockInfo::getBlockInfo) + .filter(blockInfo -> blockInfo + .getLocations() + .isEmpty()) + .map(BlockInfo::getLength) + .reduce(Long::sum) + .orElse(0L)); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java b/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java new file mode 100644 index 000000000000..2146097f07fa --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java @@ -0,0 +1,51 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import alluxio.job.JobRequest; +import alluxio.job.LoadJobRequest; +import alluxio.master.file.FileSystemMaster; +import alluxio.proto.journal.Journal; +import alluxio.scheduler.job.JobFactory; + +/** + * Producer for {@link JobFactory}. + */ +public class JobFactoryProducer { + private JobFactoryProducer() {} // prevent instantiation + + /** + * @param request the job request + * @param fsMaster the file system master + * @return the job factory + */ + public static JobFactory create(JobRequest request, FileSystemMaster fsMaster) { + if (request instanceof LoadJobRequest) { + return new LoadJobFactory((LoadJobRequest) request, fsMaster); + } + throw new IllegalArgumentException("Unknown job type: " + request.getType()); + } + + /** + * @param entry the job journal entry + * @param fsMaster the file system master + * @return the job factory + */ + public static JobFactory create(Journal.JournalEntry entry, FileSystemMaster fsMaster) { + if (entry.hasLoadJob()) { + return new JournalLoadJobFactory(entry.getLoadJob(), fsMaster); + } + else { + throw new IllegalArgumentException("Unknown job type: " + entry); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/job/JournalLoadJobFactory.java b/core/server/master/src/main/java/alluxio/master/job/JournalLoadJobFactory.java new file mode 100644 index 000000000000..8a45e13e60df --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/JournalLoadJobFactory.java @@ -0,0 +1,59 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import alluxio.master.file.FileSystemMaster; +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobFactory; +import alluxio.scheduler.job.JobState; + +import java.util.Optional; +import java.util.OptionalLong; + +/** + * Factory for creating {@link LoadJob}s from journal entries. + */ +public class JournalLoadJobFactory implements JobFactory { + + private final FileSystemMaster mFsMaster; + + private final alluxio.proto.journal.Job.LoadJobEntry mJobEntry; + + /** + * Create factory. + * @param journalEntry journal entry + * @param fsMaster file system master + */ + public JournalLoadJobFactory(alluxio.proto.journal.Job.LoadJobEntry journalEntry, + FileSystemMaster fsMaster) { + mFsMaster = fsMaster; + mJobEntry = journalEntry; + } + + @Override + public Job create() { + Optional user = + mJobEntry.hasUser() ? Optional.of(mJobEntry.getUser()) : Optional.empty(); + FileIterable fileIterator = + new FileIterable(mFsMaster, mJobEntry.getLoadPath(), user, mJobEntry.getPartialListing(), + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob job = new LoadJob(mJobEntry.getLoadPath(), user, mJobEntry.getJobId(), + mJobEntry.hasBandwidth() ? OptionalLong.of(mJobEntry.getBandwidth()) : OptionalLong.empty(), + mJobEntry.getPartialListing(), mJobEntry.getVerify(), fileIterator); + job.setJobState(JobState.fromProto(mJobEntry.getState())); + if (mJobEntry.hasEndTime()) { + job.setEndTime(mJobEntry.getEndTime()); + } + return job; + } +} + diff --git a/core/server/master/src/main/java/alluxio/master/job/LoadJob.java b/core/server/master/src/main/java/alluxio/master/job/LoadJob.java new file mode 100644 index 000000000000..36f2b277939e --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/LoadJob.java @@ -0,0 +1,689 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +import alluxio.client.block.stream.BlockWorkerClient; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.exception.runtime.InvalidArgumentRuntimeException; +import alluxio.grpc.Block; +import alluxio.grpc.BlockStatus; +import alluxio.grpc.JobProgressReportFormat; +import alluxio.grpc.LoadRequest; +import alluxio.grpc.LoadResponse; +import alluxio.grpc.TaskStatus; +import alluxio.grpc.UfsReadOptions; +import alluxio.job.JobDescription; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.proto.journal.Journal; +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobState; +import alluxio.scheduler.job.Task; +import alluxio.util.FormatUtils; +import alluxio.wire.BlockInfo; +import alluxio.wire.FileInfo; +import alluxio.wire.WorkerInfo; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Meter; +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.PropertyAccessor; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.MoreObjects; +import com.google.common.base.Objects; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.ListenableFuture; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.UUID; +import java.util.concurrent.CancellationException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Predicate; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * Load job that loads a file or a directory into Alluxio. + * This class should only be manipulated from the scheduler thread in Scheduler + * thus the state changing functions are not thread safe. + */ +@NotThreadSafe +public class LoadJob extends AbstractJob { + private static final Logger LOG = LoggerFactory.getLogger(LoadJob.class); + public static final String TYPE = "load"; + private static final double FAILURE_RATIO_THRESHOLD = 0.05; + private static final int FAILURE_COUNT_THRESHOLD = 100; + private static final int RETRY_BLOCK_CAPACITY = 1000; + private static final double RETRY_THRESHOLD = 0.8 * RETRY_BLOCK_CAPACITY; + private static final int BATCH_SIZE = Configuration.getInt(PropertyKey.JOB_BATCH_SIZE); + public static final Predicate QUALIFIED_FILE_FILTER = + (fileInfo) -> !fileInfo.isFolder() && fileInfo.isCompleted() && fileInfo.isPersisted() + && fileInfo.getInAlluxioPercentage() != 100; + // Job configurations + private final String mPath; + + private OptionalLong mBandwidth; + private boolean mUsePartialListing; + private boolean mVerificationEnabled; + + // Job states + private final LinkedList mRetryBlocks = new LinkedList<>(); + private final Map mFailedFiles = new HashMap<>(); + + private final AtomicLong mProcessedFileCount = new AtomicLong(); + private final AtomicLong mLoadedByteCount = new AtomicLong(); + private final AtomicLong mTotalByteCount = new AtomicLong(); + private final AtomicLong mTotalBlockCount = new AtomicLong(); + private final AtomicLong mCurrentBlockCount = new AtomicLong(); + private final AtomicLong mTotalFailureCount = new AtomicLong(); + private final AtomicLong mCurrentFailureCount = new AtomicLong(); + private Optional mFailedReason = Optional.empty(); + private final Iterable mFileIterable; + private Optional> mFileIterator = Optional.empty(); + private FileInfo mCurrentFile; + private Iterator mBlockIterator = Collections.emptyIterator(); + + /** + * Constructor. + * @param path file path + * @param user user for authentication + * @param bandwidth bandwidth + * @param fileIterator file iterator + */ + @VisibleForTesting + public LoadJob(String path, String user, OptionalLong bandwidth, + FileIterable fileIterator) { + this(path, Optional.of(user), UUID.randomUUID().toString(), bandwidth, false, false, + fileIterator); + } + + /** + * Constructor. + * + * @param path file path + * @param user user for authentication + * @param jobId job identifier + * @param bandwidth bandwidth + * @param usePartialListing whether to use partial listing + * @param verificationEnabled whether to verify the job after loaded + * @param fileIterable file iterable + */ + public LoadJob( + String path, + Optional user, String jobId, OptionalLong bandwidth, + boolean usePartialListing, + boolean verificationEnabled, FileIterable fileIterable) { + super(user, jobId); + mPath = requireNonNull(path, "path is null"); + Preconditions.checkArgument( + !bandwidth.isPresent() || bandwidth.getAsLong() > 0, + format("bandwidth should be greater than 0 if provided, get %s", bandwidth)); + mBandwidth = bandwidth; + mUsePartialListing = usePartialListing; + mVerificationEnabled = verificationEnabled; + mFileIterable = fileIterable; + } + + /** + * Get load file path. + * @return file path + */ + public String getPath() { + return mPath; + } + + @Override + public JobDescription getDescription() { + return JobDescription.newBuilder().setPath(mPath).setType(TYPE).build(); + } + + /** + * Get bandwidth. + * @return the allocated bandwidth + */ + public OptionalLong getBandwidth() { + return mBandwidth; + } + + /** + * Update bandwidth. + * @param bandwidth new bandwidth + */ + public void updateBandwidth(OptionalLong bandwidth) { + mBandwidth = bandwidth; + } + + /** + * Is verification enabled. + * @return whether verification is enabled + */ + public boolean isVerificationEnabled() { + return mVerificationEnabled; + } + + /** + * Enable verification. + * @param enableVerification whether to enable verification + */ + public void setVerificationEnabled(boolean enableVerification) { + mVerificationEnabled = enableVerification; + } + + /** + * Set load state to FAILED with given reason. + * @param reason failure exception + */ + @Override + public void failJob(AlluxioRuntimeException reason) { + setJobState(JobState.FAILED); + mFailedReason = Optional.of(reason); + JOB_LOAD_FAIL.inc(); + } + + @Override + public void setJobSuccess() { + setJobState(JobState.SUCCEEDED); + JOB_LOAD_SUCCESS.inc(); + } + + /** + * Add bytes to total loaded bytes. + * @param bytes bytes to be added to total + */ + @VisibleForTesting + public void addLoadedBytes(long bytes) { + mLoadedByteCount.addAndGet(bytes); + } + + @Override + public String getProgress(JobProgressReportFormat format, boolean verbose) { + return (new LoadProgressReport(this, verbose)).getReport(format); + } + + /** + * Get the processed block count in the current loading pass. + * @return current block count + */ + public long getCurrentBlockCount() { + return mCurrentBlockCount.get(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LoadJob that = (LoadJob) o; + return Objects.equal(getDescription(), that.getDescription()); + } + + @Override + public int hashCode() { + return Objects.hashCode(getDescription()); + } + + @Override + public boolean isHealthy() { + long currentFailureCount = mCurrentFailureCount.get(); + return mState != JobState.FAILED + && currentFailureCount <= FAILURE_COUNT_THRESHOLD + || (double) currentFailureCount / mCurrentBlockCount.get() <= FAILURE_RATIO_THRESHOLD; + } + + @Override + public boolean isCurrentPassDone() { + return mFileIterator.isPresent() && !mFileIterator.get().hasNext() && !mBlockIterator.hasNext() + && mRetryBlocks.isEmpty(); + } + + @Override + public void initiateVerification() { + Preconditions.checkState(isCurrentPassDone(), "Previous pass is not finished"); + mFileIterator = Optional.empty(); + mTotalBlockCount.addAndGet(mCurrentBlockCount.get()); + mTotalFailureCount.addAndGet(mCurrentFailureCount.get()); + mCurrentBlockCount.set(0); + mCurrentFailureCount.set(0); + mState = JobState.VERIFYING; + } + + /** + * get next load task. + * + * @param worker blocker to worker + * @return the next task to run. If there is no task to run, return empty + */ + @Override + public Optional getNextTask(WorkerInfo worker) { + List blocks = getNextBatchBlocks(BATCH_SIZE); + if (blocks.isEmpty()) { + return Optional.empty(); + } + return Optional.of(new LoadTask(blocks)); + } + + /** + * Get next batch of blocks. + * @param count number of blocks + * @return list of blocks + */ + @VisibleForTesting + public List getNextBatchBlocks(int count) { + if (!mFileIterator.isPresent()) { + mFileIterator = Optional.of(mFileIterable.iterator()); + if (!mFileIterator + .get() + .hasNext()) { + return ImmutableList.of(); + } + mCurrentFile = mFileIterator.get().next(); + if (!mFailedFiles.containsKey(mCurrentFile.getPath())) { + mProcessedFileCount.incrementAndGet(); + } + + mBlockIterator = mCurrentFile.getBlockIds().listIterator(); + } + ImmutableList.Builder batchBuilder = ImmutableList.builder(); + int i = 0; + // retry failed blocks if there's too many failed blocks otherwise wait until no more new block + if (mRetryBlocks.size() > RETRY_THRESHOLD + || (!mFileIterator.get().hasNext() && !mBlockIterator.hasNext())) { + while (i < count && !mRetryBlocks.isEmpty()) { + batchBuilder.add(requireNonNull(mRetryBlocks.removeFirst())); + i++; + } + } + for (; i < count; i++) { + if (!mBlockIterator.hasNext()) { + if (!mFileIterator.get().hasNext()) { + return batchBuilder.build(); + } + mCurrentFile = mFileIterator.get().next(); + if (!mFailedFiles.containsKey(mCurrentFile.getPath())) { + mProcessedFileCount.incrementAndGet(); + } + mBlockIterator = mCurrentFile.getBlockIds().listIterator(); + } + long blockId = mBlockIterator.next(); + BlockInfo blockInfo = mCurrentFile.getFileBlockInfo(blockId).getBlockInfo(); + if (blockInfo.getLocations().isEmpty()) { + batchBuilder.add(buildBlock(mCurrentFile, blockId)); + mCurrentBlockCount.incrementAndGet(); + // would be inaccurate when we initial verification, and we retry un-retryable blocks + mTotalByteCount.addAndGet(blockInfo.getLength()); + } + } + return batchBuilder.build(); + } + + /** + * Add a block to retry later. + * @param block the block that failed to load thus needing retry + * @return whether the block is successfully added + */ + @VisibleForTesting + public boolean addBlockToRetry(Block block) { + if (mRetryBlocks.size() >= RETRY_BLOCK_CAPACITY) { + return false; + } + LOG.debug("Retry block {}", block); + mRetryBlocks.add(block); + mCurrentFailureCount.incrementAndGet(); + JOB_LOAD_BLOCK_FAIL.inc(); + return true; + } + + /** + * Add a block to failure summary. + * + * @param block the block that failed to load and cannot be retried + * @param message failure message + * @param code status code for exception + */ + @VisibleForTesting + public void addBlockFailure(Block block, String message, int code) { + // When multiple blocks of the same file failed to load, from user's perspective, + // it's not hugely important what are the reasons for each specific failure, + // if they are different, so we will just keep the first one. + mFailedFiles.put(block.getUfsPath(), + format("Status code: %s, message: %s", code, message)); + mCurrentFailureCount.incrementAndGet(); + JOB_LOAD_BLOCK_FAIL.inc(); + } + + private static Block buildBlock(FileInfo fileInfo, long blockId) { + return Block.newBuilder().setBlockId(blockId) + .setLength(fileInfo.getFileBlockInfo(blockId).getBlockInfo().getLength()) + .setUfsPath(fileInfo.getUfsPath()) + .setMountId(fileInfo.getMountId()) + .setOffsetInFile(fileInfo.getFileBlockInfo(blockId).getOffset()) + .build(); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("Path", mPath) + .add("User", mUser) + .add("Bandwidth", mBandwidth) + .add("UsePartialListing", mUsePartialListing) + .add("VerificationEnabled", mVerificationEnabled) + .add("RetryBlocks", mRetryBlocks) + .add("FailedFiles", mFailedFiles) + .add("StartTime", mStartTime) + .add("ProcessedFileCount", mProcessedFileCount) + .add("LoadedByteCount", mLoadedByteCount) + .add("TotalBlockCount", mTotalBlockCount) + .add("CurrentBlockCount", mCurrentBlockCount) + .add("TotalFailureCount", mTotalFailureCount) + .add("CurrentFailureCount", mCurrentFailureCount) + .add("State", mState) + .add("BatchSize", BATCH_SIZE) + .add("FailedReason", mFailedReason) + .add("FileIterator", mFileIterator) + .add("CurrentFile", mCurrentFile) + .add("BlockIterator", mBlockIterator) + .add("EndTime", mEndTime) + .toString(); + } + + @Override + public Journal.JournalEntry toJournalEntry() { + alluxio.proto.journal.Job.LoadJobEntry.Builder jobEntry = alluxio.proto.journal.Job.LoadJobEntry + .newBuilder() + .setLoadPath(mPath) + .setState(JobState.toProto(mState)) + .setPartialListing(mUsePartialListing) + .setVerify(mVerificationEnabled) + .setJobId(mJobId); + mUser.ifPresent(jobEntry::setUser); + mBandwidth.ifPresent(jobEntry::setBandwidth); + mEndTime.ifPresent(jobEntry::setEndTime); + return Journal.JournalEntry + .newBuilder() + .setLoadJob(jobEntry.build()) + .build(); + } + + /** + * Get duration in seconds. + * @return job duration in seconds + */ + @VisibleForTesting + public long getDurationInSec() { + return (mEndTime.orElse(System.currentTimeMillis()) - mStartTime) / 1000; + } + + @Override + public boolean processResponse(LoadTask loadTask) { + try { + long totalBytes = loadTask.getBlocks().stream() + .map(Block::getLength) + .reduce(Long::sum) + .orElse(0L); + LoadResponse response = loadTask.getResponseFuture().get(); + if (response.getStatus() != TaskStatus.SUCCESS) { + LOG.debug(format("Get failure from worker: %s", response.getBlockStatusList())); + for (BlockStatus status : response.getBlockStatusList()) { + totalBytes -= status.getBlock().getLength(); + if (!isHealthy() || !status.getRetryable() || !addBlockToRetry( + status.getBlock())) { + addBlockFailure(status.getBlock(), status.getMessage(), status.getCode()); + } + } + } + addLoadedBytes(totalBytes); + JOB_LOAD_BLOCK_COUNT.inc( + loadTask.getBlocks().size() - response.getBlockStatusCount()); + JOB_LOAD_BLOCK_SIZE.inc(totalBytes); + JOB_LOAD_RATE.mark(totalBytes); + return response.getStatus() != TaskStatus.FAILURE; + } + catch (ExecutionException e) { + LOG.warn("exception when trying to get load response.", e.getCause()); + for (Block block : loadTask.getBlocks()) { + if (isHealthy()) { + addBlockToRetry(block); + } + else { + AlluxioRuntimeException exception = AlluxioRuntimeException.from(e.getCause()); + addBlockFailure(block, exception.getMessage(), exception.getStatus().getCode() + .value()); + } + } + return false; + } + catch (CancellationException e) { + LOG.warn("Task get canceled and will retry.", e); + loadTask.getBlocks().forEach(this::addBlockToRetry); + return true; + } + catch (InterruptedException e) { + loadTask.getBlocks().forEach(this::addBlockToRetry); + Thread.currentThread().interrupt(); + // We don't count InterruptedException as task failure + return true; + } + } + + @Override + public void updateJob(Job job) { + if (!(job instanceof LoadJob)) { + throw new IllegalArgumentException("Job is not a LoadJob: " + job); + } + LoadJob targetJob = (LoadJob) job; + updateBandwidth(targetJob.getBandwidth()); + setVerificationEnabled(targetJob.isVerificationEnabled()); + } + + /** + * Is verification enabled. + * + * @return whether verification is enabled + */ + @Override + public boolean needVerification() { + return mVerificationEnabled && mCurrentBlockCount.get() > 0; + } + + /** + * Loads blocks in a UFS through an Alluxio worker. + */ + public class LoadTask extends Task { + + /** + * @return blocks to load + */ + public List getBlocks() { + return mBlocks; + } + + private final List mBlocks; + + /** + * Creates a new instance of {@link LoadTask}. + * + * @param blocks blocks to load + */ + public LoadTask(List blocks) { + mBlocks = blocks; + } + + @Override + public ListenableFuture run(BlockWorkerClient workerClient) { + LoadRequest.Builder request1 = LoadRequest + .newBuilder() + .addAllBlocks(mBlocks); + UfsReadOptions.Builder options = UfsReadOptions + .newBuilder() + .setTag(mJobId) + .setPositionShort(false); + if (mBandwidth.isPresent()) { + options.setBandwidth(mBandwidth.getAsLong()); + } + mUser.ifPresent(options::setUser); + LoadRequest request = request1 + .setOptions(options.build()) + .build(); + return workerClient.load(request); + } + } + + private static class LoadProgressReport { + private final boolean mVerbose; + private final JobState mJobState; + private final Long mBandwidth; + private final boolean mVerificationEnabled; + private final long mProcessedFileCount; + private final long mLoadedByteCount; + private final Long mTotalByteCount; + private final Long mThroughput; + private final double mFailurePercentage; + private final AlluxioRuntimeException mFailureReason; + private final long mFailedFileCount; + private final Map mFailedFilesWithReasons; + + public LoadProgressReport(LoadJob job, boolean verbose) + { + mVerbose = verbose; + mJobState = job.mState; + mBandwidth = job.mBandwidth.isPresent() ? job.mBandwidth.getAsLong() : null; + mVerificationEnabled = job.mVerificationEnabled; + mProcessedFileCount = job.mProcessedFileCount.get(); + mLoadedByteCount = job.mLoadedByteCount.get(); + if (!job.mUsePartialListing && job.mFileIterator.isPresent()) { + mTotalByteCount = job.mTotalByteCount.get(); + } + else { + mTotalByteCount = null; + } + long duration = job.getDurationInSec(); + if (duration > 0) { + mThroughput = job.mLoadedByteCount.get() / duration; + } + else { + mThroughput = null; + } + long blockCount = job.mTotalBlockCount.get() + job.mCurrentBlockCount.get(); + if (blockCount > 0) { + mFailurePercentage = + ((double) (job.mTotalFailureCount.get() + job.mCurrentFailureCount.get()) / blockCount) + * 100; + } + else { + mFailurePercentage = 0; + } + mFailureReason = job.mFailedReason.orElse(null); + mFailedFileCount = job.mFailedFiles.size(); + if (verbose && mFailedFileCount > 0) { + mFailedFilesWithReasons = job.mFailedFiles; + } else { + mFailedFilesWithReasons = null; + } + } + + public String getReport(JobProgressReportFormat format) + { + switch (format) { + case TEXT: + return getTextReport(); + case JSON: + return getJsonReport(); + default: + throw new InvalidArgumentRuntimeException( + format("Unknown load progress report format: %s", format)); + } + } + + private String getTextReport() { + StringBuilder progress = new StringBuilder(); + progress.append( + format("\tSettings:\tbandwidth: %s\tverify: %s%n", + mBandwidth == null ? "unlimited" : mBandwidth, + mVerificationEnabled)); + progress.append(format("\tJob State: %s%s%n", mJobState, + mFailureReason == null + ? "" : format( + " (%s: %s)", + mFailureReason.getClass().getName(), + mFailureReason.getMessage()))); + if (mVerbose && mFailureReason != null) { + for (StackTraceElement stack : mFailureReason.getStackTrace()) { + progress.append(format("\t\t%s%n", stack.toString())); + } + } + progress.append(format("\tFiles Processed: %d%n", mProcessedFileCount)); + progress.append(format("\tBytes Loaded: %s%s%n", + FormatUtils.getSizeFromBytes(mLoadedByteCount), + mTotalByteCount == null + ? "" : format(" out of %s", FormatUtils.getSizeFromBytes(mTotalByteCount)))); + if (mThroughput != null) { + progress.append(format("\tThroughput: %s/s%n", + FormatUtils.getSizeFromBytes(mThroughput))); + } + progress.append(format("\tBlock load failure rate: %.2f%%%n", mFailurePercentage)); + progress.append(format("\tFiles Failed: %s%n", mFailedFileCount)); + if (mVerbose && mFailedFilesWithReasons != null) { + mFailedFilesWithReasons.forEach((fileName, reason) -> + progress.append(format("\t\t%s: %s%n", fileName, reason))); + } + return progress.toString(); + } + + private String getJsonReport() { + try { + return new ObjectMapper() + .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY) + .setSerializationInclusion(JsonInclude.Include.NON_NULL) + .writeValueAsString(this); + } catch (JsonProcessingException e) { + throw new InternalRuntimeException("Failed to convert LoadProgressReport to JSON", e); + } + } + } + + // metrics + public static final Counter JOB_LOAD_SUCCESS = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_SUCCESS.getName()); + public static final Counter JOB_LOAD_FAIL = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_FAIL.getName()); + public static final Counter JOB_LOAD_BLOCK_COUNT = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_COUNT.getName()); + public static final Counter JOB_LOAD_BLOCK_FAIL = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_FAIL.getName()); + public static final Counter JOB_LOAD_BLOCK_SIZE = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_SIZE.getName()); + public static final Meter JOB_LOAD_RATE = + MetricsSystem.meter(MetricKey.MASTER_JOB_LOAD_RATE.getName()); +} diff --git a/core/server/master/src/main/java/alluxio/master/job/LoadJobFactory.java b/core/server/master/src/main/java/alluxio/master/job/LoadJobFactory.java new file mode 100644 index 000000000000..c1a5fc6cd7d5 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/LoadJobFactory.java @@ -0,0 +1,65 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import alluxio.grpc.LoadJobPOptions; +import alluxio.job.LoadJobRequest; +import alluxio.master.file.FileSystemMaster; +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobFactory; +import alluxio.security.User; +import alluxio.security.authentication.AuthenticatedClientUser; + +import java.util.Optional; +import java.util.OptionalLong; +import java.util.UUID; + +/** + * Factory for creating {@link LoadJob}s that get file infos from master. + */ +public class LoadJobFactory implements JobFactory { + + private final FileSystemMaster mFsMaster; + private final LoadJobRequest mRequest; + + /** + * Create factory. + * @param request load job request + * @param fsMaster file system master + */ + public LoadJobFactory(LoadJobRequest request, FileSystemMaster fsMaster) { + mFsMaster = fsMaster; + mRequest = request; + } + + @Override + public Job create() { + LoadJobPOptions options = mRequest.getOptions(); + String path = mRequest.getPath(); + OptionalLong bandwidth = + options.hasBandwidth() ? OptionalLong.of(options.getBandwidth()) : OptionalLong.empty(); + boolean partialListing = options.hasPartialListing() && options.getPartialListing(); + boolean verificationEnabled = options.hasVerify() && options.getVerify(); + FileIterable fileIterator = new FileIterable(mFsMaster, path, Optional + .ofNullable(AuthenticatedClientUser.getOrNull()) + .map(User::getName), partialListing, + LoadJob.QUALIFIED_FILE_FILTER); + Optional user = Optional + .ofNullable(AuthenticatedClientUser.getOrNull()) + .map(User::getName); + return new LoadJob(path, user, UUID.randomUUID().toString(), + bandwidth, + partialListing, + verificationEnabled, fileIterator); + } +} + diff --git a/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java b/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java index aa74350e257a..eae8d452c37f 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java +++ b/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java @@ -24,8 +24,11 @@ import alluxio.master.MasterContext; import alluxio.master.PrimarySelector; import alluxio.master.journal.raft.RaftJournalSystem; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.util.executor.ExecutorServiceFactories; +import io.grpc.ServerInterceptors; + import java.io.IOException; import java.util.HashMap; import java.util.Map; @@ -111,7 +114,9 @@ public String getName() { public Map getServices() { Map services = new HashMap<>(); services.put(alluxio.grpc.ServiceType.JOURNAL_MASTER_CLIENT_SERVICE, - new GrpcService(new JournalMasterClientServiceHandler(this))); + new GrpcService(ServerInterceptors.intercept( + new JournalMasterClientServiceHandler(this), + new ClientContextServerInjector()))); return services; } } diff --git a/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java b/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java index d7b53bf8cced..974f5ac7d305 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java +++ b/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java @@ -12,32 +12,36 @@ package alluxio.master.journal.tool; import alluxio.master.journal.JournalEntryAssociation; -import alluxio.master.journal.checkpoint.CheckpointInputStream; +import alluxio.master.journal.checkpoint.OptimizedCheckpointInputStream; import alluxio.master.journal.raft.RaftJournalSystem; import alluxio.master.journal.raft.RaftJournalUtils; +import alluxio.master.journal.raft.SnapshotDirStateMachineStorage; import alluxio.proto.journal.Journal; import alluxio.util.io.FileUtils; import com.google.common.base.Preconditions; +import org.apache.ratis.io.MD5Hash; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.raftlog.segmented.LogSegment; import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; +import org.apache.ratis.server.storage.FileInfo; import org.apache.ratis.server.storage.RaftStorage; import org.apache.ratis.server.storage.StorageImplUtils; +import org.apache.ratis.statemachine.SnapshotInfo; import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; +import org.apache.ratis.util.MD5FileUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedOutputStream; -import java.io.DataInputStream; import java.io.File; -import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; +import java.nio.file.Path; import java.nio.file.Paths; +import java.security.MessageDigest; import java.util.List; /** @@ -121,27 +125,31 @@ private void readRatisSnapshotFromDir() throws IOException { RaftStorage.StartupOption.RECOVER, RaftServerConfigKeys.STORAGE_FREE_SPACE_MIN_DEFAULT.getSize())) { storage.initialize(); - SimpleStateMachineStorage stateMachineStorage = new SimpleStateMachineStorage(); + SnapshotDirStateMachineStorage stateMachineStorage = new SnapshotDirStateMachineStorage(); stateMachineStorage.init(storage); - SingleFileSnapshotInfo currentSnapshot = stateMachineStorage.getLatestSnapshot(); + SnapshotInfo currentSnapshot = stateMachineStorage.getLatestSnapshot(); if (currentSnapshot == null) { LOG.debug("No snapshot found"); return; } - final File snapshotFile = currentSnapshot.getFile().getPath().toFile(); + File snapshotDir = new File(stateMachineStorage.getSnapshotDir(), + SimpleStateMachineStorage.getSnapshotFileName(currentSnapshot.getTerm(), + currentSnapshot.getIndex())); String checkpointPath = String.format("%s-%s-%s", mCheckpointsDir, currentSnapshot.getIndex(), - snapshotFile.lastModified()); + snapshotDir.lastModified()); + new File(checkpointPath).mkdirs(); - try (DataInputStream inputStream = new DataInputStream(new FileInputStream(snapshotFile))) { - LOG.debug("Reading snapshot-Id: {}", inputStream.readLong()); - try (CheckpointInputStream checkpointStream = new CheckpointInputStream(inputStream)) { - readCheckpoint(checkpointStream, Paths.get(checkpointPath)); - } catch (Exception e) { - LOG.error("Failed to read snapshot from journal.", e); + for (FileInfo file : currentSnapshot.getFiles()) { + if (file.getFileDigest() != null) { + File snapshotFile = new File(snapshotDir, file.getPath().toString()); + Path humanReadableFile = Paths.get(checkpointPath, file.getPath().toString()); + MessageDigest md5 = MD5Hash.getDigester(); + try (OptimizedCheckpointInputStream is = + new OptimizedCheckpointInputStream(snapshotFile, md5)) { + readCheckpoint(is, humanReadableFile); + } + MD5FileUtil.verifySavedMD5(snapshotFile, new MD5Hash(md5.digest())); } - } catch (Exception e) { - LOG.error("Failed to load snapshot {}", snapshotFile, e); - throw e; } } } diff --git a/core/server/master/src/main/java/alluxio/master/journal/tool/UfsJournalDumper.java b/core/server/master/src/main/java/alluxio/master/journal/tool/UfsJournalDumper.java index 59f3f69952a7..b0d4f754612b 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/tool/UfsJournalDumper.java +++ b/core/server/master/src/main/java/alluxio/master/journal/tool/UfsJournalDumper.java @@ -28,7 +28,6 @@ import java.io.PrintStream; import java.net.URI; import java.net.URISyntaxException; -import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -67,9 +66,8 @@ public void dumpJournal() throws Throwable { switch (state) { case CHECKPOINT: try (CheckpointInputStream checkpoint = reader.getCheckpoint()) { - Path dir = Paths.get(mCheckpointsDir + "-" + reader.getNextSequenceNumber()); - Files.createDirectories(dir); - readCheckpoint(checkpoint, dir); + Path path = Paths.get(mCheckpointsDir + "-" + reader.getNextSequenceNumber()); + readCheckpoint(checkpoint, path); } break; case LOG: diff --git a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java index c818f58314df..d7b5ad2bd7cc 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java @@ -16,6 +16,7 @@ import alluxio.AlluxioURI; import alluxio.Constants; +import alluxio.ProjectConstants; import alluxio.RestUtils; import alluxio.RuntimeConstants; import alluxio.StorageTierAssoc; @@ -40,6 +41,7 @@ import alluxio.master.file.FileSystemMaster; import alluxio.master.file.contexts.ListStatusContext; import alluxio.master.file.meta.MountTable; +import alluxio.master.throttle.SystemMonitor.SystemStatus; import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; import alluxio.security.authentication.AuthenticatedClientUser; @@ -57,11 +59,11 @@ import alluxio.util.webui.UIFileInfo; import alluxio.util.webui.WebUtils; import alluxio.web.MasterWebServer; -import alluxio.wire.Address; import alluxio.wire.AlluxioMasterInfo; import alluxio.wire.BlockLocation; import alluxio.wire.Capacity; import alluxio.wire.ConfigCheckReport; +import alluxio.wire.ConfigHash; import alluxio.wire.FileBlockInfo; import alluxio.wire.FileInfo; import alluxio.wire.MasterInfo; @@ -98,7 +100,6 @@ import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; -import java.net.InetSocketAddress; import java.net.URLDecoder; import java.time.Instant; import java.time.ZoneOffset; @@ -209,7 +210,8 @@ public Response getInfo(@QueryParam(QUERY_RAW_CONFIGURATION) final Boolean rawCo .setRpcAddress(mMasterProcess.getRpcAddress().toString()) .setStartTimeMs(mMasterProcess.getStartTimeMs()) .setTierCapacity(getTierCapacityInternal()).setUfsCapacity(getUfsCapacityInternal()) - .setUptimeMs(mMasterProcess.getUptimeMs()).setVersion(RuntimeConstants.VERSION) + .setUptimeMs(mMasterProcess.getUptimeMs()) + .setVersion(RuntimeConstants.VERSION).setRevision(ProjectConstants.REVISION) .setWorkers(mBlockMaster.getWorkerInfoList()); }, Configuration.global()); } @@ -262,6 +264,7 @@ public Response getWebUIOverview() { .setStartTime(CommonUtils.convertMsToDate(mMetaMaster.getStartTimeMs(), Configuration.getString(PropertyKey.USER_DATE_FORMAT_PATTERN))) .setVersion(RuntimeConstants.VERSION) + .setRevision(ProjectConstants.REVISION) .setLiveWorkerNodes(Integer.toString(mBlockMaster.getWorkerCount())) .setCapacity(FormatUtils.getSizeFromBytes(mBlockMaster.getCapacityBytes())) .setClusterId(mMetaMaster.getClusterID()) @@ -370,6 +373,14 @@ public Response getWebUIOverview() { if (leaderIdGauge != null) { response.setLeaderId((String) leaderIdGauge.getValue()); } + // Add master system status + Gauge systemStatusGauge = MetricsSystem.METRIC_REGISTRY.getGauges() + .get("Master.system.status"); + if (systemStatusGauge != null) { + SystemStatus systemStatus = (SystemStatus) systemStatusGauge.getValue(); + response.setSystemStatus(systemStatus.toString()); + } + return response; }, Configuration.global()); } @@ -678,7 +689,8 @@ public Response getWebUILogs(@DefaultValue("") @QueryParam("path") String reques @DefaultValue("") @QueryParam("end") String requestEnd, @DefaultValue("20") @QueryParam("limit") String requestLimit) { return RestUtils.call(() -> { - FilenameFilter filenameFilter = (dir, name) -> name.toLowerCase().endsWith(".log"); + FilenameFilter filenameFilter = (dir, name) -> + Constants.LOG_FILE_PATTERN.matcher(name.toLowerCase()).matches(); MasterWebUILogs response = new MasterWebUILogs(); if (!Configuration.getBoolean(PropertyKey.WEB_FILE_INFO_ENABLED)) { @@ -686,9 +698,6 @@ public Response getWebUILogs(@DefaultValue("") @QueryParam("path") String reques } response.setDebug(Configuration.getBoolean(PropertyKey.DEBUG)).setInvalidPathError("") .setViewingOffset(0).setCurrentPath(""); - //response.setDownloadLogFile(1); - //response.setBaseUrl("./browseLogs"); - //response.setShowPermissions(false); String logsPath = Configuration.getString(PropertyKey.LOGS_DIR); File logsDir = new File(logsPath); @@ -733,7 +742,6 @@ public Response getWebUILogs(@DefaultValue("") @QueryParam("path") String reques } } else { // Request a specific log file. - // Only allow filenames as the path, to avoid arbitrary local path lookups. requestFile = new File(requestFile).getName(); response.setCurrentPath(requestFile); @@ -810,12 +818,11 @@ public Response getWebUIConfiguration() { MasterWebUIConfiguration response = new MasterWebUIConfiguration(); response.setWhitelist(mFileSystemMaster.getWhiteList()); - + alluxio.wire.Configuration conf = mMetaMaster.getConfiguration( + GetConfigurationPOptions.newBuilder().setRawValue(true).build()); TreeSet> sortedProperties = new TreeSet<>(); Set alluxioConfExcludes = Sets.newHashSet(PropertyKey.MASTER_WHITELIST.toString()); - for (ConfigProperty configProperty : mMetaMaster - .getConfiguration(GetConfigurationPOptions.newBuilder().setRawValue(true).build()) - .toProto().getClusterConfigsList()) { + for (ConfigProperty configProperty : conf.toProto().getClusterConfigsList()) { String confName = configProperty.getName(); if (!alluxioConfExcludes.contains(confName)) { sortedProperties.add(new ImmutableTriple<>(confName, @@ -825,7 +832,8 @@ public Response getWebUIConfiguration() { } response.setConfiguration(sortedProperties); - + response.setConfigHash(new ConfigHash(conf.getClusterConfHash(), conf.getPathConfHash(), + conf.getClusterConfLastUpdateTime(), conf.getPathConfLastUpdateTime())); return response; }, Configuration.global()); } @@ -863,24 +871,34 @@ public Response getWebUIWorkers() { @GET @Path(WEBUI_MASTERS) public Response getWebUIMasters() { - return RestUtils.call(() -> { - MasterWebUIMasters response = new MasterWebUIMasters(); - - response.setDebug(Configuration.getBoolean(PropertyKey.DEBUG)); - - MasterInfo[] failedMasterInfos = mMetaMaster.getLostMasterInfos(); - response.setFailedMasterInfos(failedMasterInfos); - - MasterInfo[] normalMasterInfos = mMetaMaster.getMasterInfos(); - response.setNormalMasterInfos(normalMasterInfos); - - InetSocketAddress leaderMasterAddress = mMasterProcess.getRpcAddress(); - MasterInfo leaderMasterInfo = new MasterInfo(MASTER_ID_NULL, - new Address(leaderMasterAddress.getHostString(), leaderMasterAddress.getPort()), - System.currentTimeMillis()); - response.setLeaderMasterInfo(leaderMasterInfo); - return response; - }, Configuration.global()); + final Map gauges = MetricsSystem.METRIC_REGISTRY.getGauges(); + Gauge lastCheckpointGauge = gauges + .get(MetricKey.MASTER_JOURNAL_LAST_CHECKPOINT_TIME.getName()); + long lastCheckpointTime = lastCheckpointGauge == null ? 0 + : (long) lastCheckpointGauge.getValue(); + Gauge journalEntriesGauge = gauges + .get(MetricKey.MASTER_JOURNAL_ENTRIES_SINCE_CHECKPOINT.getName()); + long journalEntriesSinceCheckpoint = journalEntriesGauge == null ? 0 + : (long) journalEntriesGauge.getValue(); + + Gauge lastGainPrimacyGuage = gauges + .get(MetricKey.MASTER_LAST_GAIN_PRIMACY_TIME.getName()); + long lastGainPrimacyTime = lastGainPrimacyGuage == null ? 0 + : (long) lastGainPrimacyGuage.getValue(); + + return RestUtils.call(() -> new MasterWebUIMasters() + .setDebug(Configuration.getBoolean(PropertyKey.DEBUG)) + .setLostMasterInfos(mMetaMaster.getLostMasterInfos()) + .setStandbyMasterInfos(mMetaMaster.getStandbyMasterInfos()) + .setPrimaryMasterInfo(new MasterInfo(MASTER_ID_NULL, mMetaMaster.getMasterAddress()) + .setLastUpdatedTimeMs(System.currentTimeMillis()) + .setStartTimeMs(mMasterProcess.getStartTimeMs()) + .setGainPrimacyTimeMs(lastGainPrimacyTime) + .setLastCheckpointTimeMs(lastCheckpointTime) + .setJournalEntriesSinceCheckpoint(journalEntriesSinceCheckpoint) + .setVersion(ProjectConstants.VERSION) + .setRevision(ProjectConstants.REVISION)), + Configuration.global()); } /** @@ -971,8 +989,11 @@ public Response getWebUIMetrics() { .setTotalBytesReadRemote(FormatUtils.getSizeFromBytes(bytesReadRemote)) .setTotalBytesReadUfs(FormatUtils.getSizeFromBytes(bytesReadUfs)); + Long bytesReadCache = counters.get( + MetricKey.CLUSTER_BYTES_READ_CACHE.getName()).getCount(); + // cluster cache hit and miss - long bytesReadTotal = bytesReadLocal + bytesReadRemote + bytesReadDomainSocket; + long bytesReadTotal = bytesReadLocal + bytesReadCache + bytesReadUfs; double cacheHitLocalPercentage = (bytesReadTotal > 0) ? (100D * (bytesReadLocal + bytesReadDomainSocket) / bytesReadTotal) : 0; diff --git a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java index 8b1631c4f43e..bdd3cc06c70c 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java @@ -20,6 +20,7 @@ import alluxio.conf.Configuration; import alluxio.conf.ConfigurationValueOptions; import alluxio.conf.PropertyKey; +import alluxio.conf.ReconfigurableRegistry; import alluxio.conf.Source; import alluxio.exception.AlluxioException; import alluxio.exception.status.NotFoundException; @@ -27,12 +28,19 @@ import alluxio.grpc.BackupPOptions; import alluxio.grpc.BackupPRequest; import alluxio.grpc.BackupStatusPRequest; +import alluxio.grpc.BuildVersion; import alluxio.grpc.GetConfigurationPOptions; import alluxio.grpc.GrpcService; +import alluxio.grpc.MasterHeartbeatPOptions; import alluxio.grpc.MetaCommand; +import alluxio.grpc.NetAddress; +import alluxio.grpc.ProxyHeartbeatPOptions; +import alluxio.grpc.ProxyHeartbeatPRequest; +import alluxio.grpc.ProxyStatus; import alluxio.grpc.RegisterMasterPOptions; import alluxio.grpc.Scope; import alluxio.grpc.ServiceType; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatExecutor; import alluxio.heartbeat.HeartbeatThread; @@ -52,6 +60,7 @@ import alluxio.proto.journal.Journal; import alluxio.proto.journal.Meta; import alluxio.resource.CloseableIterator; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.underfs.UfsManager; import alluxio.util.ConfigurationUtils; import alluxio.util.IdUtils; @@ -66,6 +75,7 @@ import alluxio.wire.ConfigHash; import com.google.common.collect.ImmutableSet; +import io.grpc.ServerInterceptors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -73,12 +83,14 @@ import java.net.InetSocketAddress; import java.text.MessageFormat; import java.time.Clock; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executors; import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; @@ -119,6 +131,11 @@ public final class DefaultMetaMaster extends CoreMaster implements MetaMaster { private final IndexedSet mLostMasters = new IndexedSet<>(ID_INDEX, ADDRESS_INDEX); + /** Keeps track of proxies which are in communication with the primary master. */ + private final Map mProxies = new ConcurrentHashMap<>(); + /** Keeps track of proxies which are no longer in communication with the primary master. */ + private final Map mLostProxies = new ConcurrentHashMap<>(); + /** The connect address for the rpc server. */ private final InetSocketAddress mRpcConnectAddress = NetworkAddressUtils.getConnectAddress(NetworkAddressUtils.ServiceType.MASTER_RPC, @@ -139,7 +156,7 @@ public final class DefaultMetaMaster extends CoreMaster implements MetaMaster { /** Path level properties. */ private final PathProperties mPathProperties; - /** Persisted state for MetaMaster. */ + /** Persisted state for {@link MetaMaster}. */ private final State mState; /** Value to be used for the cluster ID when not assigned. */ @@ -152,7 +169,7 @@ public final class DefaultMetaMaster extends CoreMaster implements MetaMaster { private final JournalSpaceMonitor mJournalSpaceMonitor; /** - * Journaled state for MetaMaster. + * Journaled state for {@link MetaMaster}. */ @NotThreadSafe public static final class State implements alluxio.master.journal.Journaled { @@ -256,17 +273,31 @@ public CloseableIterator getJournalEntryIterator() { public Map getServices() { Map services = new HashMap<>(); services.put(ServiceType.META_MASTER_CONFIG_SERVICE, - new GrpcService(new MetaMasterConfigurationServiceHandler(this)).disableAuthentication()); + new GrpcService(ServerInterceptors.intercept( + new MetaMasterConfigurationServiceHandler(this), + new ClientContextServerInjector())).disableAuthentication()); services.put(ServiceType.META_MASTER_CLIENT_SERVICE, - new GrpcService(new MetaMasterClientServiceHandler(this))); + new GrpcService(ServerInterceptors.intercept( + new MetaMasterClientServiceHandler(this), + new ClientContextServerInjector()))); services.put(ServiceType.META_MASTER_MASTER_SERVICE, - new GrpcService(new MetaMasterMasterServiceHandler(this))); + new GrpcService(ServerInterceptors.intercept( + new MetaMasterMasterServiceHandler(this), + new ClientContextServerInjector()))); + services.put(ServiceType.META_MASTER_PROXY_SERVICE, + new GrpcService(new MetaMasterProxyServiceHandler(this))); // Add backup role services. services.putAll(mBackupRole.getRoleServices()); services.putAll(mJournalSystem.getJournalServices()); return services; } + @Override + public Map getStandbyServices() { + // for snapshot propagation + return new HashMap<>(mJournalSystem.getJournalServices()); + } + @Override public String getName() { return Constants.META_MASTER_NAME; @@ -291,13 +322,20 @@ public void start(Boolean isPrimary) throws IOException { getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_LOST_MASTER_DETECTION, new LostMasterDetectionHeartbeatExecutor(), - (int) Configuration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_LOG_CONFIG_REPORT_SCHEDULING, new LogConfigReportHeartbeatExecutor(), - (int) Configuration - .getMs(PropertyKey.MASTER_LOG_CONFIG_REPORT_HEARTBEAT_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_LOG_CONFIG_REPORT_HEARTBEAT_INTERVAL)), + Configuration.global(), mMasterContext.getUserState())); + getExecutorService().submit(new HeartbeatThread( + HeartbeatContext.MASTER_LOST_PROXY_DETECTION, + new LostProxyDetectionHeartbeatExecutor(), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_PROXY_CHECK_HEARTBEAT_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); if (Configuration.getBoolean(PropertyKey.MASTER_DAILY_BACKUP_ENABLED)) { @@ -308,7 +346,8 @@ public void start(Boolean isPrimary) throws IOException { if (mJournalSpaceMonitor != null) { getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_JOURNAL_SPACE_MONITOR, mJournalSpaceMonitor, - Configuration.getMs(PropertyKey.MASTER_JOURNAL_SPACE_MONITOR_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_JOURNAL_SPACE_MONITOR_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); } if (mState.getClusterID().equals(INVALID_CLUSTER_ID)) { @@ -321,7 +360,8 @@ public void start(Boolean isPrimary) throws IOException { && !Configuration.getBoolean(PropertyKey.TEST_MODE)) { getExecutorService().submit(new HeartbeatThread(HeartbeatContext.MASTER_UPDATE_CHECK, new UpdateChecker(this), - (int) Configuration.getMs(PropertyKey.MASTER_UPDATE_CHECK_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_UPDATE_CHECK_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); } } else { @@ -336,7 +376,8 @@ public void start(Boolean isPrimary) throws IOException { .newBuilder(ClientContext.create(Configuration.global())).build()); getExecutorService().submit(new HeartbeatThread(HeartbeatContext.META_MASTER_SYNC, new MetaMasterSync(mMasterAddress, metaMasterClient), - (int) Configuration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); LOG.info("Standby master with address {} starts sending heartbeat to leader master.", mMasterAddress); @@ -422,6 +463,7 @@ public alluxio.wire.Configuration getConfiguration(GetConfigurationPOptions opti // NOTE(cc): assumes that Configuration is read-only when master is running, otherwise, // the following hash might not correspond to the above cluster configuration. builder.setClusterConfHash(Configuration.hash()); + builder.setClusterConfLastUpdateTime(Configuration.getLastUpdateTime()); } if (!options.getIgnorePathConf()) { @@ -430,6 +472,7 @@ public alluxio.wire.Configuration getConfiguration(GetConfigurationPOptions opti properties.forEach((key, value) -> builder.addPathProperty(path, key, value))); builder.setPathConfHash(pathProperties.getHash()); + builder.setPathConfLastUpdateTime(pathProperties.getLastUpdateTime()); } return builder.build(); @@ -437,7 +480,8 @@ public alluxio.wire.Configuration getConfiguration(GetConfigurationPOptions opti @Override public ConfigHash getConfigHash() { - return new ConfigHash(Configuration.hash(), mPathProperties.hash()); + return new ConfigHash(Configuration.hash(), mPathProperties.hash(), + Configuration.getLastUpdateTime(), mPathProperties.getLastUpdateTime()); } @Override @@ -478,6 +522,11 @@ public boolean getNewerVersionAvailable() { return mNewerVersionAvailable; } + @Override + public Address getMasterAddress() { + return mMasterAddress; + } + @Override public List

getMasterAddresses() { return mMasterConfigStore.getLiveNodeAddresses(); @@ -489,24 +538,27 @@ public List
getWorkerAddresses() { } @Override - public alluxio.wire.MasterInfo[] getMasterInfos() { - alluxio.wire.MasterInfo[] masterInfos = new alluxio.wire.MasterInfo[mMasters.size()]; - int indexNum = 0; - for (MasterInfo master : mMasters) { - masterInfos[indexNum] = new alluxio.wire.MasterInfo(master.getId(), - master.getAddress(), master.getLastUpdatedTimeMs()); - indexNum++; - } - return masterInfos; + public alluxio.wire.MasterInfo[] getStandbyMasterInfos() { + return toWire(mMasters); } @Override public alluxio.wire.MasterInfo[] getLostMasterInfos() { - alluxio.wire.MasterInfo[] masterInfos = new alluxio.wire.MasterInfo[mLostMasters.size()]; + return toWire(mLostMasters); + } + + private static alluxio.wire.MasterInfo[] toWire(final IndexedSet masters) { + alluxio.wire.MasterInfo[] masterInfos = new alluxio.wire.MasterInfo[masters.size()]; int indexNum = 0; - for (MasterInfo master : mLostMasters) { - masterInfos[indexNum] = new alluxio.wire.MasterInfo(master.getId(), - master.getAddress(), master.getLastUpdatedTimeMs()); + for (MasterInfo master : masters) { + masterInfos[indexNum] = new alluxio.wire.MasterInfo(master.getId(), master.getAddress()) + .setLastUpdatedTimeMs(master.getLastUpdatedTimeMs()) + .setStartTimeMs(master.getStartTimeMs()) + .setLosePrimacyTimeMs(master.getLosePrimacyTimeMs()) + .setLastCheckpointTimeMs(master.getLastCheckpointTimeMs()) + .setJournalEntriesSinceCheckpoint(master.getJournalEntriesSinceCheckpoint()) + .setVersion(master.getVersion()) + .setRevision(master.getRevision()); indexNum++; } return masterInfos; @@ -574,7 +626,8 @@ public boolean isInSafeMode() { } @Override - public MetaCommand masterHeartbeat(long masterId) { + public MetaCommand masterHeartbeat(long masterId, MasterHeartbeatPOptions options) { + LOG.debug("A heartbeat request was received from Standby master: {}.", masterId); MasterInfo master = mMasters.getFirstByField(ID_INDEX, masterId); if (master == null) { LOG.warn("Could not find master id: {} for heartbeat.", masterId); @@ -582,6 +635,12 @@ public MetaCommand masterHeartbeat(long masterId) { } master.updateLastUpdatedTimeMs(); + if (options.hasLastCheckpointTime()) { + master.setLastCheckpointTimeMs(options.getLastCheckpointTime()); + } + if (options.hasJournalEntriesSinceCheckpoint()) { + master.setJournalEntriesSinceCheckpoint(options.getJournalEntriesSinceCheckpoint()); + } return MetaCommand.MetaCommand_Nothing; } @@ -595,12 +654,47 @@ public void masterRegister(long masterId, RegisterMasterPOptions options) } master.updateLastUpdatedTimeMs(); + if (options.hasStartTimeMs()) { + master.setStartTimeMs(options.getStartTimeMs()); + } + if (options.hasLosePrimacyTimeMs()) { + master.setLosePrimacyTimeMs(options.getLosePrimacyTimeMs()); + } + if (options.hasVersion()) { + master.setVersion(options.getVersion()); + } + if (options.hasRevision()) { + master.setRevision(options.getRevision()); + } mMasterConfigStore.registerNewConf(master.getAddress(), options.getConfigsList()); LOG.info("registerMaster(): master: {}", master); } + @Override + public void proxyHeartbeat(ProxyHeartbeatPRequest request) { + LOG.debug("Received proxy heartbeat {}", request); + ProxyHeartbeatPOptions options = request.getOptions(); + NetAddress address = options.getProxyAddress(); + mProxies.compute(address, (key, proxyInfo) -> { + if (proxyInfo == null) { + ProxyInfo info = new ProxyInfo(address); + info.setStartTimeMs(options.getStartTime()); + info.setVersion(options.getVersion().getVersion()); + info.setRevision(options.getVersion().getRevision()); + info.updateLastHeartbeatTimeMs(); + return info; + } else { + proxyInfo.setVersion(options.getVersion().getVersion()); + proxyInfo.setRevision(options.getVersion().getRevision()); + proxyInfo.updateLastHeartbeatTimeMs(); + return proxyInfo; + } + }); + mLostProxies.remove(address); + } + @Override public CheckpointName getCheckpointName() { return CheckpointName.META_MASTER; @@ -631,7 +725,7 @@ public void resetState() { @Override public Map updateConfiguration(Map propertiesMap) { Map result = new HashMap<>(); - int successCount = 0; + Map changedProperties = new HashMap<>(); for (Map.Entry entry : propertiesMap.entrySet()) { try { PropertyKey key = PropertyKey.fromString(entry.getKey()); @@ -641,7 +735,7 @@ public Map updateConfiguration(Map propertiesMa Object value = key.parseValue(entry.getValue()); Configuration.set(key, value, Source.RUNTIME); result.put(entry.getKey(), true); - successCount++; + changedProperties.put(key, Configuration.get(key)); LOG.info("Property {} has been updated to \"{}\" from \"{}\"", key.getName(), entry.getValue(), oldValue); } else { @@ -653,7 +747,35 @@ public Map updateConfiguration(Map propertiesMa LOG.error("Failed to update property {} to {}", entry.getKey(), entry.getValue(), e); } } - LOG.debug("Update {} properties, succeed {}.", propertiesMap.size(), successCount); + LOG.debug("Updating {} properties, {} succeed.", propertiesMap.size(), + changedProperties.size()); + if (changedProperties.size() > 0) { + ReconfigurableRegistry.update(changedProperties); + } + return result; + } + + @Override + public List listProxyStatus() { + List result = new ArrayList<>(); + for (Map.Entry entry : mProxies.entrySet()) { + ProxyInfo info = entry.getValue(); + result.add(ProxyStatus.newBuilder().setAddress(entry.getKey()) + .setState("ACTIVE") + .setVersion(BuildVersion.newBuilder() + .setVersion(info.getVersion()).setRevision(info.getRevision()).build()) + .setStartTime(info.getStartTimeMs()) + .setLastHeartbeatTime(info.getLastHeartbeatTimeMs()).build()); + } + for (Map.Entry entry : mLostProxies.entrySet()) { + ProxyInfo info = entry.getValue(); + result.add(ProxyStatus.newBuilder().setAddress(entry.getKey()) + .setState("LOST") + .setVersion(BuildVersion.newBuilder() + .setVersion(info.getVersion()).setRevision(info.getRevision()).build()) + .setStartTime(info.getStartTimeMs()) + .setLastHeartbeatTime(info.getLastHeartbeatTimeMs()).build()); + } return result; } @@ -669,7 +791,7 @@ public LostMasterDetectionHeartbeatExecutor() { } @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { long masterTimeoutMs = Configuration.getMs(PropertyKey.MASTER_HEARTBEAT_TIMEOUT); for (MasterInfo master : mMasters) { synchronized (master) { @@ -691,6 +813,50 @@ public void close() { } } + /** + * Lost proxy periodic check. + */ + private final class LostProxyDetectionHeartbeatExecutor implements HeartbeatExecutor { + + /** + * Constructs a new {@link LostProxyDetectionHeartbeatExecutor}. + */ + public LostProxyDetectionHeartbeatExecutor() { + } + + @Override + public void heartbeat(long timeLimitMs) { + long proxyTimeoutMs = Configuration.getMs(PropertyKey.MASTER_PROXY_TIMEOUT_MS); + long masterProxyDeleteTimeoutMs = + Configuration.getMs(PropertyKey.MASTER_LOST_PROXY_DELETION_TIMEOUT_MS); + LOG.debug("LostProxyDetection checking proxies at {}", mProxies.keySet()); + mProxies.entrySet().removeIf(entry -> { + final long lastUpdate = mClock.millis() - entry.getValue().getLastHeartbeatTimeMs(); + if (lastUpdate > proxyTimeoutMs) { + LOG.warn("Proxy {} last heartbeat time {} was more than {}ms ago", + entry.getKey(), entry.getValue().getLastHeartbeatTimeMs(), proxyTimeoutMs); + mLostProxies.put(entry.getKey(), entry.getValue()); + return true; + } + return false; + }); + mLostProxies.entrySet().removeIf(entry -> { + final long lastUpdate = mClock.millis() - entry.getValue().getLastHeartbeatTimeMs(); + if (lastUpdate > masterProxyDeleteTimeoutMs) { + LOG.warn("Proxy {} has been LOST for more than {}ms. " + + "Master will forget about this Proxy", entry.getKey(), masterProxyDeleteTimeoutMs); + return true; + } + return false; + }); + } + + @Override + public void close() { + // Nothing to clean up + } + } + /** * Periodically log the config check report. */ @@ -698,7 +864,7 @@ private final class LogConfigReportHeartbeatExecutor implements HeartbeatExecuto private volatile boolean mFirst = true; @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { // Skip the first heartbeat since it happens before servers have time to register their // configurations. if (mFirst) { diff --git a/core/server/master/src/main/java/alluxio/master/meta/JournalSpaceMonitor.java b/core/server/master/src/main/java/alluxio/master/meta/JournalSpaceMonitor.java index 8b74f695e6a9..d917be9e348f 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/JournalSpaceMonitor.java +++ b/core/server/master/src/main/java/alluxio/master/meta/JournalSpaceMonitor.java @@ -169,7 +169,7 @@ public List getJournalDiskWarnings() { } @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { getJournalDiskWarnings().forEach(LOG::warn); } diff --git a/core/server/master/src/main/java/alluxio/master/meta/MasterInfo.java b/core/server/master/src/main/java/alluxio/master/meta/MasterInfo.java index 5b7742620886..871db4e26464 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MasterInfo.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MasterInfo.java @@ -29,6 +29,18 @@ public final class MasterInfo { private final long mId; /** Master's last updated time in ms. */ private long mLastUpdatedTimeMs; + /** Master's start time in ms. */ + private long mStartTimeMs = 0; + /** Master's last lose primacy time in ms. */ + private long mLosePrimacyTimeMs = 0; + /** Master's version. */ + private String mVersion = ""; + /** Master's revision. */ + private String mRevision = ""; + /** Master's last checkpoint time in ms. */ + private long mLastCheckpointTimeMs = 0; + /** Number of journal entries since last checkpoint. */ + private long mJournalEntriesSinceCheckpoint = 0; /** * Creates a new instance of {@link MasterInfo}. @@ -63,10 +75,82 @@ public long getLastUpdatedTimeMs() { return mLastUpdatedTimeMs; } + /** + * @return the start time of the master in ms + */ + public long getStartTimeMs() { + return mStartTimeMs; + } + + /** + * @return the last lose primacy time of the master in ms + */ + public long getLosePrimacyTimeMs() { + return mLosePrimacyTimeMs; + } + + /** + * @return the version of the master + */ + public String getVersion() { + return mVersion; + } + + /** + * @return the revision of the master + */ + public String getRevision() { + return mRevision; + } + + /** + * @return the time of last checkpoint + */ + public long getLastCheckpointTimeMs() { + return mLastCheckpointTimeMs; + } + + /** + * @return number of journal entries since last checkpoint + */ + public long getJournalEntriesSinceCheckpoint() { + return mJournalEntriesSinceCheckpoint; + } + @Override public String toString() { return MoreObjects.toStringHelper(this).add("id", mId).add("address", mAddress) - .add("lastUpdatedTimeMs", mLastUpdatedTimeMs).toString(); + .add("lastUpdatedTimeMs", mLastUpdatedTimeMs).add("startTimeMs", mStartTimeMs) + .add("losePrimacyTimeMs", mLosePrimacyTimeMs) + .add("version", mVersion).add("revision", mRevision).toString(); + } + + /** + * @param startTimeMs the start time of the master in ms + */ + public void setStartTimeMs(long startTimeMs) { + mStartTimeMs = startTimeMs; + } + + /** + * @param losePrimacyTimeMs the last primacy state change time of the master in ms + */ + public void setLosePrimacyTimeMs(long losePrimacyTimeMs) { + mLosePrimacyTimeMs = losePrimacyTimeMs; + } + + /** + * @param version the version of the master + */ + public void setVersion(String version) { + mVersion = version; + } + + /** + * @param revision the revision of the master + */ + public void setRevision(String revision) { + mRevision = revision; } /** @@ -75,4 +159,18 @@ public String toString() { public void updateLastUpdatedTimeMs() { mLastUpdatedTimeMs = System.currentTimeMillis(); } + + /** + * @param lastCheckpointTimeMs the time of last checkpoint + */ + public void setLastCheckpointTimeMs(long lastCheckpointTimeMs) { + mLastCheckpointTimeMs = lastCheckpointTimeMs; + } + + /** + * @param journalEntriesSinceCheckpoint number of journal entries since last checkpoint + */ + public void setJournalEntriesSinceCheckpoint(long journalEntriesSinceCheckpoint) { + mJournalEntriesSinceCheckpoint = journalEntriesSinceCheckpoint; + } } diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMaster.java index 3038e7830ca4..175077106312 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMaster.java @@ -15,7 +15,10 @@ import alluxio.exception.status.NotFoundException; import alluxio.exception.status.UnavailableException; import alluxio.grpc.GetConfigurationPOptions; +import alluxio.grpc.MasterHeartbeatPOptions; import alluxio.grpc.MetaCommand; +import alluxio.grpc.ProxyHeartbeatPRequest; +import alluxio.grpc.ProxyStatus; import alluxio.grpc.RegisterMasterPOptions; import alluxio.master.Master; import alluxio.master.backup.BackupOps; @@ -36,7 +39,6 @@ * The interface of meta master. */ public interface MetaMaster extends BackupOps, Master { - /** * @return the cluster ID */ @@ -99,6 +101,11 @@ void setPathConfiguration(String path, Map properties) */ boolean getNewerVersionAvailable(); + /** + * @return the address of this master + */ + Address getMasterAddress(); + /** * @return the addresses of live masters */ @@ -133,12 +140,12 @@ void setPathConfiguration(String path, Map properties) int getWebPort(); /** - * @return a array of {@link MasterInfo}s of masters + * @return an array of {@link MasterInfo} of standby masters */ - MasterInfo[] getMasterInfos(); + MasterInfo[] getStandbyMasterInfos(); /** - * @return a array of {@link MasterInfo}s of lost masters + * @return an array of {@link MasterInfo} of lost masters */ MasterInfo[] getLostMasterInfos(); @@ -156,9 +163,10 @@ void setPathConfiguration(String path, Map properties) * A standby master periodically heartbeats with the leader master. * * @param masterId the master id + * @param options the options that contains optional master info * @return an optional command for the standby master to execute */ - MetaCommand masterHeartbeat(long masterId); + MetaCommand masterHeartbeat(long masterId, MasterHeartbeatPOptions options); /** * A standby master registers with the leader master. @@ -181,4 +189,18 @@ void setPathConfiguration(String path, Map properties) * @return the update properties status map */ Map updateConfiguration(Map propertiesMap); + + /** + * A Proxy periodically heartbeats with the primary master. + * + * @param request the heartbeat message + */ + void proxyHeartbeat(ProxyHeartbeatPRequest request); + + /** + * Lists information of all known Proxy instances. + * + * @return a list of status + */ + List listProxyStatus(); } diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterClientServiceHandler.java index cb33f8391d04..a6e4bbefe09b 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterClientServiceHandler.java @@ -24,9 +24,13 @@ import alluxio.grpc.GetConfigReportPResponse; import alluxio.grpc.GetMasterInfoPOptions; import alluxio.grpc.GetMasterInfoPResponse; +import alluxio.grpc.ListProxyStatusPRequest; +import alluxio.grpc.ListProxyStatusPResponse; import alluxio.grpc.MasterInfo; import alluxio.grpc.MasterInfoField; +import alluxio.grpc.MasterVersion; import alluxio.grpc.MetaMasterClientServiceGrpc; +import alluxio.grpc.NetAddress; import alluxio.master.StateLockOptions; import alluxio.master.journal.raft.RaftJournalSystem; import alluxio.wire.Address; @@ -139,6 +143,36 @@ public void getMasterInfo(GetMasterInfoPOptions options, masterInfo.setRaftJournal(mMetaMaster.getMasterContext().getJournalSystem() instanceof RaftJournalSystem); break; + case MASTER_VERSION: + masterInfo.addMasterVersions( + MasterVersion.newBuilder() + .setAddresses(NetAddress.newBuilder().setHost( + mMetaMaster.getRpcAddress().getHostName()) + .setRpcPort(mMetaMaster.getRpcAddress().getPort()).build()) + .setVersion(RuntimeConstants.VERSION) + .setState("PRIMARY") + .build() + ); + List standbyMasterVersions = + Arrays.stream(mMetaMaster.getStandbyMasterInfos()) + .map(it -> MasterVersion.newBuilder() + .setVersion(it.getVersion()) + .setAddresses(it.getAddress().toProto()) + .setState("STANDBY") + .build()) + .collect(Collectors.toList()); + + masterInfo.addAllMasterVersions(standbyMasterVersions); + List lostMasterVersions = + Arrays.stream(mMetaMaster.getLostMasterInfos()) + .map(it -> MasterVersion.newBuilder() + .setVersion(it.getVersion()) + .setAddresses(it.getAddress().toProto()) + .setState("LOST") + .build()) + .collect(Collectors.toList()); + masterInfo.addAllMasterVersions(lostMasterVersions); + break; default: LOG.warn("Unrecognized meta master info field: " + field); } @@ -154,4 +188,13 @@ public void checkpoint(CheckpointPOptions options, () -> CheckpointPResponse.newBuilder().setMasterHostname(mMetaMaster.checkpoint()).build(), "checkpoint", "options=%s", responseObserver, options); } + + @Override + public void listProxyStatus(ListProxyStatusPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, + () -> ListProxyStatusPResponse.newBuilder() + .addAllProxyStatuses(mMetaMaster.listProxyStatus()).build(), + "listProxyStatus", "options=%s", responseObserver, request.getOptions()); + } } diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterMasterServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterMasterServiceHandler.java index 4af0992906a2..ea5a4eddf631 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterMasterServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterMasterServiceHandler.java @@ -68,8 +68,8 @@ public void registerMaster(RegisterMasterPRequest request, @Override public void masterHeartbeat(MasterHeartbeatPRequest request, StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> MasterHeartbeatPResponse.newBuilder() - .setCommand(mMetaMaster.masterHeartbeat(request.getMasterId())).build(), + RpcUtils.call(LOG, () -> MasterHeartbeatPResponse.newBuilder().setCommand( + mMetaMaster.masterHeartbeat(request.getMasterId(), request.getOptions())).build(), "masterHeartbeat", "request=%s", responseObserver, request); } } diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterProxyServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterProxyServiceHandler.java new file mode 100644 index 000000000000..97eeebfab155 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterProxyServiceHandler.java @@ -0,0 +1,52 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.meta; + +import alluxio.RpcUtils; +import alluxio.grpc.MetaMasterProxyServiceGrpc; +import alluxio.grpc.ProxyHeartbeatPRequest; +import alluxio.grpc.ProxyHeartbeatPResponse; + +import io.grpc.stub.StreamObserver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.concurrent.NotThreadSafe; + +/** + * This class is a gRPC handler for meta master RPCs invoked by an Alluxio standby master. + */ +@NotThreadSafe +public final class MetaMasterProxyServiceHandler + extends MetaMasterProxyServiceGrpc.MetaMasterProxyServiceImplBase { + private static final Logger LOG = LoggerFactory.getLogger(MetaMasterProxyServiceHandler.class); + + private final MetaMaster mMetaMaster; + + /** + * Creates a new instance of {@link MetaMasterProxyServiceHandler}. + * + * @param metaMaster the Alluxio meta master + */ + public MetaMasterProxyServiceHandler(MetaMaster metaMaster) { + mMetaMaster = metaMaster; + } + + @Override + public void proxyHeartbeat(ProxyHeartbeatPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + mMetaMaster.proxyHeartbeat(request); + return ProxyHeartbeatPResponse.newBuilder().build(); + }, "proxyHeartbeat", "request=%s", responseObserver, request); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java index f793f2d7fa34..87bddb267cf2 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java @@ -62,12 +62,14 @@ public MetaMasterSync(Address masterAddress, RetryHandlingMetaMasterMasterClient * Heartbeats to the leader master node. */ @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { MetaCommand command = null; try { if (mMasterId.get() == UNINITIALIZED_MASTER_ID) { setIdAndRegister(); } + LOG.debug("Standby master: {} send a heartbeat request to the leader master.", + mMasterId.get()); command = mMasterClient.heartbeat(mMasterId.get()); handleCommand(command); } catch (IOException e) { @@ -116,5 +118,7 @@ private void setIdAndRegister() throws IOException { } @Override - public void close() {} + public void close() { + mMasterClient.close(); + } } diff --git a/core/server/master/src/main/java/alluxio/master/meta/PathProperties.java b/core/server/master/src/main/java/alluxio/master/meta/PathProperties.java index 717871cc691d..a38a743d1a87 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/PathProperties.java +++ b/core/server/master/src/main/java/alluxio/master/meta/PathProperties.java @@ -66,7 +66,7 @@ public final class PathProperties implements DelegatingJournaled { */ public PathPropertiesView snapshot() { try (LockResource r = new LockResource(mLock.readLock())) { - return new PathPropertiesView(get(), hash()); + return new PathPropertiesView(get(), hash(), mHash.getLastUpdateTime()); } } @@ -154,6 +154,13 @@ public Journaled getDelegate() { return mState; } + /** + * @return the last update time of the properties + */ + public long getLastUpdateTime() { + return mHash.getLastUpdateTime(); + } + /** * Journaled state of path level properties. */ diff --git a/core/server/master/src/main/java/alluxio/master/meta/PathPropertiesView.java b/core/server/master/src/main/java/alluxio/master/meta/PathPropertiesView.java index 5eb1cdbb1fd2..2fc4831c8c6e 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/PathPropertiesView.java +++ b/core/server/master/src/main/java/alluxio/master/meta/PathPropertiesView.java @@ -19,16 +19,20 @@ public final class PathPropertiesView { private final Map> mProperties; private final String mHash; + private final long mLastUpdateTime; /** * Constructs a read-only view of path level properties. * * @param properties map from path to properties * @param hash hash of all path level properties + * @param lastUpdateTime last update time */ - public PathPropertiesView(Map> properties, String hash) { + public PathPropertiesView(Map> properties, String hash, + long lastUpdateTime) { mProperties = properties; mHash = hash; + mLastUpdateTime = lastUpdateTime; } /** @@ -44,4 +48,11 @@ public Map> getProperties() { public String getHash() { return mHash; } + + /** + * @return last update time + */ + public long getLastUpdateTime() { + return mLastUpdateTime; + } } diff --git a/core/server/master/src/main/java/alluxio/master/meta/ProxyInfo.java b/core/server/master/src/main/java/alluxio/master/meta/ProxyInfo.java new file mode 100644 index 000000000000..3f8aecb121f2 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/meta/ProxyInfo.java @@ -0,0 +1,120 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.meta; + +import alluxio.grpc.NetAddress; +import alluxio.util.CommonUtils; + +import com.google.common.base.MoreObjects; +import com.google.common.base.Preconditions; + +import javax.annotation.concurrent.NotThreadSafe; + +/** + * Proxy information. + */ +@NotThreadSafe +public final class ProxyInfo { + /** Proxy's address. */ + private final NetAddress mAddress; + /** Proxy's last updated time in ms. */ + private long mLastHeartbeatTimeMs; + /** Proxy's start time in ms. */ + private long mStartTimeMs = 0; + /** Proxy's version. */ + private String mVersion = ""; + /** Proxy's revision. */ + private String mRevision = ""; + + /** + * Creates a new instance of {@link ProxyInfo}. + * + * @param address the proxy address to use + */ + public ProxyInfo(NetAddress address) { + mAddress = Preconditions.checkNotNull(address, "address"); + mLastHeartbeatTimeMs = CommonUtils.getCurrentMs(); + } + + /** + * @return the proxy's address + */ + public NetAddress getAddress() { + return mAddress; + } + + /** + * @return the last updated time of the proxy in ms + */ + public long getLastHeartbeatTimeMs() { + return mLastHeartbeatTimeMs; + } + + /** + * @return the start time of the proxy in ms + */ + public long getStartTimeMs() { + return mStartTimeMs; + } + + /** + * @return the version of the proxy + */ + public String getVersion() { + return mVersion; + } + + /** + * @return the revision of the proxy + */ + public String getRevision() { + return mRevision; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("address", mAddress) + .add("lastHeartbeatTimeMs", mLastHeartbeatTimeMs) + .add("startTimeMs", mStartTimeMs) + .add("version", mVersion) + .add("revision", mRevision).toString(); + } + + /** + * @param startTimeMs the start time of the proxy in ms + */ + public void setStartTimeMs(long startTimeMs) { + mStartTimeMs = startTimeMs; + } + + /** + * @param version the version of the proxy + */ + public void setVersion(String version) { + mVersion = version; + } + + /** + * @param revision the revision of the proxy + */ + public void setRevision(String revision) { + mRevision = revision; + } + + /** + * Updates the last updated time of the proxy in ms. + */ + public void updateLastHeartbeatTimeMs() { + mLastHeartbeatTimeMs = CommonUtils.getCurrentMs(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/meta/RetryHandlingMetaMasterMasterClient.java b/core/server/master/src/main/java/alluxio/master/meta/RetryHandlingMetaMasterMasterClient.java index 3f155c9185b5..44bde127afe1 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/RetryHandlingMetaMasterMasterClient.java +++ b/core/server/master/src/main/java/alluxio/master/meta/RetryHandlingMetaMasterMasterClient.java @@ -13,8 +13,10 @@ import alluxio.AbstractMasterClient; import alluxio.Constants; +import alluxio.ProjectConstants; import alluxio.grpc.ConfigProperty; import alluxio.grpc.GetMasterIdPRequest; +import alluxio.grpc.MasterHeartbeatPOptions; import alluxio.grpc.MasterHeartbeatPRequest; import alluxio.grpc.MetaCommand; import alluxio.grpc.MetaMasterMasterServiceGrpc; @@ -22,13 +24,17 @@ import alluxio.grpc.RegisterMasterPRequest; import alluxio.grpc.ServiceType; import alluxio.master.MasterClientContext; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; import alluxio.wire.Address; +import com.codahale.metrics.Gauge; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.List; +import java.util.Map; import javax.annotation.concurrent.ThreadSafe; /** @@ -90,8 +96,22 @@ public long getId(final Address address) throws IOException { * @return whether this master should re-register */ public MetaCommand heartbeat(final long masterId) throws IOException { + final Map gauges = MetricsSystem.METRIC_REGISTRY.getGauges(); + Gauge lastCheckpointGauge = gauges + .get(MetricKey.MASTER_JOURNAL_LAST_CHECKPOINT_TIME.getName()); + Gauge journalEntriesGauge = gauges + .get(MetricKey.MASTER_JOURNAL_ENTRIES_SINCE_CHECKPOINT.getName()); + MasterHeartbeatPOptions.Builder optionsBuilder = MasterHeartbeatPOptions.newBuilder(); + if (lastCheckpointGauge != null) { + optionsBuilder.setLastCheckpointTime((long) lastCheckpointGauge.getValue()); + } + if (journalEntriesGauge != null) { + optionsBuilder.setJournalEntriesSinceCheckpoint((long) journalEntriesGauge.getValue()); + } + return retryRPC(() -> mClient - .masterHeartbeat(MasterHeartbeatPRequest.newBuilder().setMasterId(masterId).build()) + .masterHeartbeat(MasterHeartbeatPRequest.newBuilder().setMasterId(masterId) + .setOptions(optionsBuilder).build()) .getCommand(), LOG, "Heartbeat", "masterId=%d", masterId); } @@ -103,10 +123,22 @@ public MetaCommand heartbeat(final long masterId) throws IOException { */ public void register(final long masterId, final List configList) throws IOException { + final Map gauges = MetricsSystem.METRIC_REGISTRY.getGauges(); + RegisterMasterPOptions.Builder optionsBuilder = RegisterMasterPOptions.newBuilder() + .addAllConfigs(configList) + .setVersion(ProjectConstants.VERSION) + .setRevision(ProjectConstants.REVISION); + Gauge startTimeGauge = gauges.get(MetricKey.MASTER_START_TIME.getName()); + if (startTimeGauge != null) { + optionsBuilder.setStartTimeMs((long) startTimeGauge.getValue()); + } + Gauge lastLosePrimacyGuage = gauges.get(MetricKey.MASTER_LAST_LOSE_PRIMACY_TIME.getName()); + if (lastLosePrimacyGuage != null) { + optionsBuilder.setLosePrimacyTimeMs((long) lastLosePrimacyGuage.getValue()); + } retryRPC(() -> { mClient.registerMaster(RegisterMasterPRequest.newBuilder().setMasterId(masterId) - .setOptions(RegisterMasterPOptions.newBuilder().addAllConfigs(configList).build()) - .build()); + .setOptions(optionsBuilder).build()); return null; }, LOG, "Register", "masterId=%d,configList=%s", masterId, configList); } diff --git a/core/server/master/src/main/java/alluxio/master/meta/UpdateChecker.java b/core/server/master/src/main/java/alluxio/master/meta/UpdateChecker.java index d7d75f837014..7bfdfb6e77c2 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/UpdateChecker.java +++ b/core/server/master/src/main/java/alluxio/master/meta/UpdateChecker.java @@ -45,7 +45,7 @@ public UpdateChecker(DefaultMetaMaster metaMaster) { * Heartbeat for the periodic update check. */ @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { try { List additionalInfo = new ArrayList<>(); int clusterSize = mMetaMaster.getWorkerAddresses().size(); diff --git a/core/server/master/src/main/java/alluxio/master/metastore/InodeStore.java b/core/server/master/src/main/java/alluxio/master/metastore/InodeStore.java index bd5f34e4b9b4..c6991c12cfd8 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/InodeStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/InodeStore.java @@ -13,10 +13,14 @@ import alluxio.master.file.meta.Inode; import alluxio.master.file.meta.InodeLockManager; +import alluxio.master.file.meta.InodeTree; import alluxio.master.file.meta.InodeView; import alluxio.master.file.meta.MutableInode; import alluxio.master.journal.checkpoint.Checkpointed; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.io.Closeable; import java.util.Optional; import java.util.function.Function; @@ -37,6 +41,8 @@ */ @ThreadSafe public interface InodeStore extends ReadOnlyInodeStore, Checkpointed, Closeable { + Logger LOG = LoggerFactory.getLogger(InodeStore.class); + /** * Gets a mutable representation of the specified inode. * @@ -178,6 +184,39 @@ default void addChild(long parentId, InodeView child) { @Override default void close() {} + /** + * Traverses the inode path starting from the given inode up to the root. Used for debugging. + * @param inode the leaf inode + * @return the string that contains all inode proto on the path + */ + default String getInodePathString(InodeView inode) { + int iterationCount = 100; + try { + StringBuilder sb = new StringBuilder(); + InodeView currentInode = inode; + do { + sb.append('['); + sb.append(currentInode.toProto()); + sb.append("]<-"); + currentInode = get(currentInode.getParentId()).orElse(null); + if (currentInode == null) { + break; + } + iterationCount--; + } while (currentInode.getParentId() != InodeTree.NO_PARENT && iterationCount >= 0); + if (iterationCount == 0) { + sb.append("[Ignored]..."); + } else { + sb.append("[ROOT]"); + } + return sb.toString(); + } catch (Exception e) { + LOG.error("Traverse and print inode path failed, {}{}", e.getClass().getName(), + e.getMessage()); + return "ERROR"; + } + } + /** * Used to perform batched writes. Call {@link #createWriteBatch()} to use batched writes. * diff --git a/core/server/master/src/main/java/alluxio/master/metastore/ReadOnlyInodeStore.java b/core/server/master/src/main/java/alluxio/master/metastore/ReadOnlyInodeStore.java index 191e90da720c..37795632c962 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/ReadOnlyInodeStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/ReadOnlyInodeStore.java @@ -11,15 +11,24 @@ package alluxio.master.metastore; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.file.options.DescendantType; import alluxio.master.file.meta.EdgeEntry; import alluxio.master.file.meta.Inode; import alluxio.master.file.meta.InodeDirectoryView; +import alluxio.master.file.meta.InodeIterationResult; +import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.LockedInodePath; import alluxio.master.file.meta.MutableInode; import alluxio.resource.CloseableIterator; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import java.io.Closeable; +import java.io.IOException; import java.util.Iterator; import java.util.NoSuchElementException; import java.util.Optional; @@ -182,6 +191,122 @@ default CloseableIterator getChildren(InodeDirectoryView inode) return getChildren(inode.getId(), ReadOption.defaults()); } + /** + * Creates an iterator starting from the path, and including its + * children. + * @param option the read option + * @param descendantType the type of descendants to load + * @param includeBaseInode if the iterator should include the inode from the base path + * @param lockedPath the locked path to the root inode + * @return a skippable iterator that supports to skip children during the iteration + */ + default SkippableInodeIterator getSkippableChildrenIterator( + ReadOption option, DescendantType descendantType, boolean includeBaseInode, + LockedInodePath lockedPath) { + Inode inode; + try { + inode = lockedPath.getInode(); + } catch (FileDoesNotExistException e) { + return new SkippableInodeIterator() { + @Override + public void skipChildrenOfTheCurrent() { + } + + @Override + public void close() { + } + + @Override + public boolean hasNext() { + return false; + } + + @Override + public InodeIterationResult next() { + throw new NoSuchElementException(); + } + }; + } + if (descendantType == DescendantType.ALL) { + return new RecursiveInodeIterator(this, inode, includeBaseInode, option, lockedPath); + } else if (descendantType == DescendantType.NONE) { + Preconditions.checkState(includeBaseInode); + // if descendant type is none, we should only return the parent node + return new SkippableInodeIterator() { + InodeIterationResult mFirst = new InodeIterationResult(inode, lockedPath); + @Override + public void close() { + } + + @Override + public void skipChildrenOfTheCurrent() { + } + + @Override + public boolean hasNext() { + return mFirst != null; + } + + @Override + public InodeIterationResult next() { + if (mFirst == null) { + throw new NoSuchElementException(); + } + InodeIterationResult ret = mFirst; + mFirst = null; + return ret; + } + }; + } + + final CloseableIterator iterator = getChildren(inode.getId(), option); + return new SkippableInodeIterator() { + + LockedInodePath mPreviousPath = null; + final LockedInodePath mRootPath = lockedPath; + Inode mFirst = includeBaseInode ? inode : null; + + @Override + public void skipChildrenOfTheCurrent() { + // No-op + } + + @Override + public boolean hasNext() { + return mFirst != null || iterator.hasNext(); + } + + @Override + public InodeIterationResult next() { + if (mFirst != null) { + Inode ret = mFirst; + mFirst = null; + return new InodeIterationResult(ret, lockedPath); + } + if (mPreviousPath != null) { + mPreviousPath.close(); + } + Inode inode = iterator.next(); + + try { + mPreviousPath = mRootPath.lockChild(inode, InodeTree.LockPattern.WRITE_EDGE, false); + } catch (InvalidPathException e) { + // Should not reach here since the path should be valid + throw new InternalRuntimeException(e); + } + return new InodeIterationResult(inode, mPreviousPath); + } + + @Override + public void close() throws IOException { + iterator.close(); + if (mPreviousPath != null) { + mPreviousPath.close(); + } + } + }; + } + /** * @param inodeId an inode id * @param name an inode name diff --git a/core/server/master/src/main/java/alluxio/master/metastore/RecursiveInodeIterator.java b/core/server/master/src/main/java/alluxio/master/metastore/RecursiveInodeIterator.java new file mode 100644 index 000000000000..76d7cfa78a97 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/metastore/RecursiveInodeIterator.java @@ -0,0 +1,233 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore; + +import alluxio.AlluxioURI; +import alluxio.collections.Pair; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.master.file.meta.Inode; +import alluxio.master.file.meta.InodeIterationResult; +import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.LockedInodePath; +import alluxio.resource.CloseableIterator; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Stack; +import java.util.function.Function; +import javax.annotation.Nullable; + +/** + * A recursive inode iterator that supports to skip children inodes during iteration. + */ +public class RecursiveInodeIterator implements SkippableInodeIterator { + private static final Logger LOG = LoggerFactory.getLogger(RecursiveInodeIterator.class); + + private final Stack, LockedInodePath>> + mIteratorStack = new Stack<>(); + private final ReadOnlyInodeStore mInodeStore; + private boolean mHasNextCalled = false; + private boolean mHasNext; + private final List mNameComponents = new ArrayList<>(); + private final List mStartAfterPathComponents; + private LockedInodePath mLastLockedPath = null; + private Inode mFirst; + private final LockedInodePath mRootPath; + private boolean mCurrentInodeDirectory; + + /** + * Constructs an instance. + * + * @param inodeStore the inode store + * @param inode the root inode + * @param includeBaseInode if the inode of the base path should be included + * @param readOption the read option + * @param lockedPath the locked path to the root inode + */ + public RecursiveInodeIterator( + ReadOnlyInodeStore inodeStore, + Inode inode, + boolean includeBaseInode, + ReadOption readOption, + LockedInodePath lockedPath + ) { + mFirst = includeBaseInode ? inode : null; + mRootPath = lockedPath; + String startFrom = readOption.getStartFrom(); + if (startFrom == null) { + mStartAfterPathComponents = Collections.emptyList(); + } else { + try { + startFrom = readOption.getStartFrom().startsWith(AlluxioURI.SEPARATOR) + ? readOption.getStartFrom().substring(1) : readOption.getStartFrom(); + mStartAfterPathComponents = Arrays.asList(startFrom + .split(AlluxioURI.SEPARATOR)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + ReadOption firstReadOption; + if (mStartAfterPathComponents.size() > 0) { + firstReadOption = + ReadOption.newBuilder().setReadFrom(mStartAfterPathComponents.get(0)).build(); + } else { + firstReadOption = ReadOption.defaults(); + } + mIteratorStack.push(new Pair<>(inodeStore.getChildren( + inode.getId(), firstReadOption), lockedPath)); + mInodeStore = inodeStore; + } + + // The locked inode path will become stale after skipChildrenOfTheCurrent() is called. + @Override + public void skipChildrenOfTheCurrent() { + if (mHasNextCalled) { + throw new IllegalStateException("Cannot call hasNext"); + } + if (!mCurrentInodeDirectory) { + // If the current inode is a file, then this is just a no-op. + return; + } + popStack(); + if (mNameComponents.size() > 0) { + mNameComponents.remove(mNameComponents.size() - 1); + } + } + + private void popStack() { + Pair, LockedInodePath> item = mIteratorStack.pop(); + item.getFirst().close(); + if (!mIteratorStack.isEmpty()) { + item.getSecond().close(); + } + } + + @Override + public boolean hasNext() { + if (mFirst != null) { + return true; + } + if (mHasNextCalled) { + return mHasNext; + } + while (!mIteratorStack.isEmpty() && !tryOnIterator( + mIteratorStack.peek().getFirst(), CloseableIterator::hasNext + )) { + popStack(); + // When the iteration finishes, the size of mPathComponents is 0 + if (mNameComponents.size() > 0) { + mNameComponents.remove(mNameComponents.size() - 1); + } + } + mHasNextCalled = true; + mHasNext = !mIteratorStack.isEmpty(); + return mHasNext; + } + + @Override + public InodeIterationResult next() { + if (!hasNext()) { + throw new InternalRuntimeException("Called next on a completed iterator"); + } + if (mFirst != null) { + Inode ret = mFirst; + mFirst = null; + mCurrentInodeDirectory = ret.isDirectory(); + return new InodeIterationResult(ret, mRootPath); + } + Pair, LockedInodePath> top = mIteratorStack.peek(); + try { + top.getSecond().traverse(); + } catch (InvalidPathException e) { + // should not reach here as the path is valid + throw new InternalRuntimeException(e); + } + if (mLastLockedPath != null) { + mLastLockedPath.close(); + mLastLockedPath = null; + } else { + if (top.getSecond().getLockPattern() != InodeTree.LockPattern.READ) { + // after the parent has been returned, we can downgrade it to a read lock + top.getSecond().downgradeToRead(); + } + } + Inode current = tryOnIterator(top.getFirst(), CloseableIterator::next); + LockedInodePath lockedPath; + try { + lockedPath = top.getSecond().lockChild(current, InodeTree.LockPattern.WRITE_EDGE, false); + } catch (InvalidPathException e) { + // should not reach here as the path is valid + throw new InternalRuntimeException(e); + } + if (current.isDirectory()) { + ReadOption readOption = ReadOption.newBuilder() + .setReadFrom(populateStartAfter(current.getName())).build(); + CloseableIterator nextLevelIterator = + mInodeStore.getChildren(current.getId(), readOption); + mIteratorStack.push(new Pair<>(nextLevelIterator, lockedPath)); + mNameComponents.add(current.getName()); + } else { + mLastLockedPath = lockedPath; + } + mHasNextCalled = false; + mCurrentInodeDirectory = current.isDirectory(); + return new InodeIterationResult(current, lockedPath); + } + + /** + * @param currentInodeName the current inode name + * @return the startAfter string that are used when getChildren is called + */ + private @Nullable String populateStartAfter(String currentInodeName) { + if (mNameComponents.size() + 1 >= mStartAfterPathComponents.size()) { + return null; + } + for (int i = 0; i < mNameComponents.size(); ++i) { + if (!mNameComponents.get(i).equals(mStartAfterPathComponents.get(i))) { + return null; + } + } + if (!currentInodeName.equals(mStartAfterPathComponents.get(mNameComponents.size()))) { + return null; + } + return mStartAfterPathComponents.get(mNameComponents.size() + 1); + } + + private T tryOnIterator( + CloseableIterator iterator, + Function, T> supplier) { + try { + return supplier.apply(iterator); + } catch (Exception e) { + iterator.close(); + throw e; + } + } + + @Override + public void close() throws IOException { + if (mLastLockedPath != null) { + mLastLockedPath.close(); + mLastLockedPath = null; + } + while (!mIteratorStack.isEmpty()) { + popStack(); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/metastore/SkippableInodeIterator.java b/core/server/master/src/main/java/alluxio/master/metastore/SkippableInodeIterator.java new file mode 100644 index 000000000000..d5d14215e0b1 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/metastore/SkippableInodeIterator.java @@ -0,0 +1,30 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore; + +import alluxio.master.file.meta.InodeIterationResult; + +import java.io.Closeable; +import java.util.Iterator; + +/** + * Iterator over inodes that allows to skip a directory when iterating. + */ +public interface SkippableInodeIterator + extends Iterator, Closeable { + /** + * Skip the children of the current inode during the iteration. + */ + default void skipChildrenOfTheCurrent() { + throw new UnsupportedOperationException("Operation not supported"); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/metastore/caching/Cache.java b/core/server/master/src/main/java/alluxio/master/metastore/caching/Cache.java index 5ecd4287a7e6..59f12dc7c977 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/caching/Cache.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/caching/Cache.java @@ -174,13 +174,31 @@ private Optional getSkipCache(K key) { /** * Writes a key/value pair to the cache. + * This method is similar to {@link #put(Object, Object)}, but with an added information that + * the entry is new. + * + * @param key the key + * @param value the value + */ + public void putNewEntry(K key, V value) { + putInternal(key, value, true); + } + + /** + * Writes a key/value pair to the cache. + * If it is known that the entry is new, prefer {@link #putNewEntry(Object, Object)}. * * @param key the key * @param value the value */ public void put(K key, V value) { + putInternal(key, value, false); + } + + private void putInternal(K key, V value, boolean isNewEntry) { mMap.compute(key, (k, entry) -> { - onPut(key, value); + V existingValue = entry == null ? null : entry.mValue; + onPut(key, existingValue, value, isNewEntry); if (entry == null && cacheIsFull()) { writeToBackingStore(key, value); return null; @@ -450,9 +468,11 @@ protected void onCacheRemove(K key) {} * Callback triggered whenever a new key/value pair is added by put(key, value). * * @param key the added key + * @param existingValue the current value if exists, otherwise null * @param value the added value + * @param isNewKey a user input boolean indicates if the key is a new key or not */ - protected void onPut(K key, V value) {} + protected void onPut(K key, @Nullable V existingValue, V value, boolean isNewKey) {} /** * Callback triggered whenever a key is removed by remove(key). diff --git a/core/server/master/src/main/java/alluxio/master/metastore/caching/CachingInodeStore.java b/core/server/master/src/main/java/alluxio/master/metastore/caching/CachingInodeStore.java index a0f947833c7e..614e5683c34a 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/caching/CachingInodeStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/caching/CachingInodeStore.java @@ -19,6 +19,8 @@ import alluxio.conf.AlluxioConfiguration; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.grpc.ErrorType; import alluxio.master.file.meta.Edge; import alluxio.master.file.meta.EdgeEntry; import alluxio.master.file.meta.Inode; @@ -47,10 +49,12 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import com.google.common.io.Closer; +import io.grpc.Status; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.Closeable; +import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.util.Collection; @@ -63,8 +67,10 @@ import java.util.Optional; import java.util.Set; import java.util.SortedMap; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -189,7 +195,7 @@ public void writeNewInode(MutableInode inode) { if (inode.isDirectory()) { mListingCache.addEmptyDirectory(inode.getId()); } - mInodeCache.put(inode.getId(), inode); + mInodeCache.putNewEntry(inode.getId(), inode); } @Override @@ -265,6 +271,24 @@ public CheckpointName getCheckpointName() { return CheckpointName.CACHING_INODE_STORE; } + @Override + public CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.runAsync(() -> { + LOG.info("Flushing inodes to backing store"); + try { + mInodeCache.flush(); + mEdgeCache.flush(); + } catch (InterruptedException e) { + throw new AlluxioRuntimeException(Status.INTERNAL, + String.format("Failed to restore snapshot %s", getCheckpointName()), + null, ErrorType.Internal, false); + } + LOG.info("Finished flushing inodes to backing store"); + mBackingStore.writeToCheckpoint(directory, executorService).join(); + }, executorService); + } + @Override public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { LOG.info("Flushing inodes to backing store"); @@ -274,6 +298,18 @@ public void writeToCheckpoint(OutputStream output) throws IOException, Interrupt mBackingStore.writeToCheckpoint(output); } + @Override + public CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.runAsync(() -> { + mInodeCache.clear(); + mEdgeCache.clear(); + mListingCache.clear(); + mBackingStore.restoreFromCheckpoint(directory, executorService).join(); + mBackingStoreEmpty = false; + }, executorService); + } + @Override public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { mInodeCache.clear(); @@ -299,6 +335,24 @@ public InodeCache(CacheConfiguration conf) { MetricKey.MASTER_INODE_CACHE_MISSES, MetricKey.MASTER_INODE_CACHE_SIZE); } + @Override + protected void onPut( + Long id, @Nullable MutableInode existingInode, MutableInode inode, boolean newEntry) { + if (newEntry && existingInode != null && inode != null + && !existingInode.getName().equals(inode.getName())) { + LOG.error( + "[InodeTreeCorruption] trying writing the inode name {} id {}, parent id {}, " + + "but a different inode name {} id {} parent id {} already exists. " + + "Your journal files are probably corrupted!", + inode.getName(), inode.getId(), inode.getParentId(), + existingInode.getName(), existingInode.getId(), existingInode.getParentId()); + if (LOG.isDebugEnabled()) { + LOG.debug("[InodeTreeCorruption] Existing inode: {}, new written inode: {}", + getInodePathString(existingInode), getInodePathString(inode)); + } + } + } + @Override protected Optional> load(Long id) { if (mBackingStoreEmpty) { @@ -511,7 +565,7 @@ protected void onCacheRemove(Edge edge) { } @Override - protected void onPut(Edge edge, Long childId) { + protected void onPut(Edge edge, Long ignored, Long childId, boolean newEntry) { mListingCache.addEdge(edge, childId); } diff --git a/core/server/master/src/main/java/alluxio/master/metastore/heap/HeapInodeStore.java b/core/server/master/src/main/java/alluxio/master/metastore/heap/HeapInodeStore.java index 97582ce2c990..1a30de940e74 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/heap/HeapInodeStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/heap/HeapInodeStore.java @@ -34,6 +34,8 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.OutputStream; @@ -56,6 +58,7 @@ */ @ThreadSafe public class HeapInodeStore implements InodeStore { + private static final Logger LOG = LoggerFactory.getLogger(HeapInodeStore.class); private final Map> mInodes = new ConcurrentHashMap<>(); // Map from inode id to ids of children of that inode. The inner maps are ordered by child name. private final TwoKeyConcurrentSortedMap> mEdges = @@ -78,6 +81,25 @@ public void remove(Long inodeId) { mInodes.remove(inodeId); } + @Override + public void writeNewInode(MutableInode inode) { + mInodes.compute(inode.getId(), (k, existingInode) -> { + if (existingInode != null && !existingInode.getName().equals(inode.getName())) { + LOG.error( + "[InodeTreeCorruption] trying writing the inode name {} id {}, parent id {}, " + + "but a different inode name {} id {} parent id {} already exists. " + + "Your journal files are probably corrupted!", + inode.getName(), inode.getId(), inode.getParentId(), + existingInode.getName(), existingInode.getId(), existingInode.getParentId()); + if (LOG.isDebugEnabled()) { + LOG.debug("[InodeTreeCorruption] Existing inode: {}, new written inode: {}", + getInodePathString(existingInode), getInodePathString(inode)); + } + } + return existingInode == null ? inode : existingInode; + }); + } + @Override public void writeInode(MutableInode inode) { mInodes.putIfAbsent(inode.getId(), inode); diff --git a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksBlockMetaStore.java b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksBlockMetaStore.java index d0f60304ca2e..a3b733767435 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksBlockMetaStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksBlockMetaStore.java @@ -15,6 +15,7 @@ import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.master.journal.checkpoint.CheckpointName; import alluxio.master.metastore.BlockMetaStore; import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; @@ -60,13 +61,17 @@ * Block store backed by RocksDB. */ @ThreadSafe -public class RocksBlockMetaStore implements BlockMetaStore { +public class RocksBlockMetaStore implements BlockMetaStore, RocksCheckpointed { private static final Logger LOG = LoggerFactory.getLogger(RocksBlockMetaStore.class); private static final String BLOCKS_DB_NAME = "blocks"; private static final String BLOCK_META_COLUMN = "block-meta"; private static final String BLOCK_LOCATIONS_COLUMN = "block-locations"; private static final String ROCKS_STORE_NAME = "BlockStore"; + /* + * Below 3 fields are created and managed by the external user class, + * no need to close in this class + */ // This is a field instead of a constant because it depends on the call to RocksDB.loadLibrary(). private final WriteOptions mDisableWAL; private final ReadOptions mIteratorOption; @@ -75,7 +80,9 @@ public class RocksBlockMetaStore implements BlockMetaStore { private final List mToClose = new ArrayList<>(); private final RocksStore mRocksStore; - // The handles are closed in RocksStore + /* + * The ColumnFamilyHandle instances are created and closed in RocksStore + */ private final AtomicReference mBlockMetaColumn = new AtomicReference<>(); private final AtomicReference mBlockLocationsColumn = new AtomicReference<>(); private final LongAdder mSize = new LongAdder(); @@ -89,11 +96,14 @@ public RocksBlockMetaStore(String baseDir) { RocksDB.loadLibrary(); // the rocksDB objects must be initialized after RocksDB.loadLibrary() is called mDisableWAL = new WriteOptions().setDisableWAL(true); + mToClose.add(mDisableWAL); mReadPrefixSameAsStart = new ReadOptions().setPrefixSameAsStart(true); + mToClose.add(mReadPrefixSameAsStart); mIteratorOption = new ReadOptions() .setReadaheadSize(Configuration.getBytes( PropertyKey.MASTER_METASTORE_ITERATOR_READAHEAD_SIZE)) .setTotalOrderSeek(true); + mToClose.add(mIteratorOption); List columns = new ArrayList<>(); DBOptions opts = new DBOptions(); @@ -119,16 +129,20 @@ && new String(columns.get(2).getName()).equals(BLOCK_LOCATIONS_COLUMN), .setCreateMissingColumnFamilies(true) .setCreateIfMissing(true) .setMaxOpenFiles(-1); + // This is a field instead of a constant as it depends on the call to RocksDB.loadLibrary(). + CompressionType compressionType = + Configuration.getEnum(PropertyKey.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_TYPE, + CompressionType.class); columns.add(new ColumnFamilyDescriptor(BLOCK_META_COLUMN.getBytes(), new ColumnFamilyOptions() .useFixedLengthPrefixExtractor(Longs.BYTES) // allows memtable buckets by block id .setMemTableConfig(new HashLinkedListMemTableConfig()) // bucket contains single value - .setCompressionType(CompressionType.NO_COMPRESSION))); + .setCompressionType(compressionType))); columns.add(new ColumnFamilyDescriptor(BLOCK_LOCATIONS_COLUMN.getBytes(), new ColumnFamilyOptions() .useFixedLengthPrefixExtractor(Longs.BYTES) // allows memtable buckets by block id .setMemTableConfig(new HashLinkedListMemTableConfig()) // bucket contains worker info - .setCompressionType(CompressionType.NO_COMPRESSION))); + .setCompressionType(compressionType))); } mToClose.addAll(columns.stream().map( @@ -261,7 +275,7 @@ && new String(columns.get(2).getName()).equals(BLOCK_LOCATIONS_COLUMN), } private long getProperty(String rocksPropertyName) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { return db().getAggregatedLongProperty(rocksPropertyName); } catch (RocksDBException e) { LOG.warn(String.format("error collecting %s", rocksPropertyName), e); @@ -272,7 +286,7 @@ private long getProperty(String rocksPropertyName) { @Override public Optional getBlock(long id) { byte[] meta; - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { meta = db().get(mBlockMetaColumn.get(), Longs.toByteArray(id)); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -289,7 +303,7 @@ public Optional getBlock(long id) { @Override public void putBlock(long id, BlockMeta meta) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { byte[] buf = db().get(mBlockMetaColumn.get(), Longs.toByteArray(id)); // Overwrites the key if it already exists. db().put(mBlockMetaColumn.get(), mDisableWAL, Longs.toByteArray(id), meta.toByteArray()); @@ -304,7 +318,7 @@ public void putBlock(long id, BlockMeta meta) { @Override public void removeBlock(long id) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { byte[] buf = db().get(mBlockMetaColumn.get(), Longs.toByteArray(id)); db().delete(mBlockMetaColumn.get(), mDisableWAL, Longs.toByteArray(id)); if (buf != null) { @@ -318,8 +332,14 @@ public void removeBlock(long id) { @Override public void clear() { - mSize.reset(); - mRocksStore.clear(); + LOG.info("Waiting to clear RocksBlockMetaStore.."); + try (RocksExclusiveLockHandle lock = mRocksStore.lockForRewrite()) { + LOG.info("Clearing RocksDB"); + mSize.reset(); + mRocksStore.clear(); + } + // Reset the DB state and prepare to serve again + LOG.info("RocksBlockMetaStore cleared and ready to serve again"); } @Override @@ -328,17 +348,23 @@ public long size() { } @Override + /** + * There may be concurrent readers and writers so we have to guarantee thread safety when + * closing the RocksDB and all RocksObject instances. The sequence for closing is: + * 1. Mark flag mClosed = true without locking. + * All new readers/writers should see the flag and not start the operation. + * 2. Acquire the WriteLock before shutting down, so it waits for all concurrent r/w to + * bail or finish. + */ public void close() { - mSize.reset(); - LOG.info("Closing RocksBlockStore and recycling all RocksDB JNI objects"); - mRocksStore.close(); - mIteratorOption.close(); - mDisableWAL.close(); - mReadPrefixSameAsStart.close(); - // Close the elements in the reverse order they were added - Collections.reverse(mToClose); - mToClose.forEach(RocksObject::close); - LOG.info("RocksBlockStore closed"); + LOG.info("RocksBlockStore is being closed"); + try (RocksExclusiveLockHandle lock = mRocksStore.lockForClosing()) { + mSize.reset(); + mRocksStore.close(); + // Close the elements in the reverse order they were added + Collections.reverse(mToClose); + mToClose.forEach(RocksObject::close); + } } @Override @@ -350,8 +376,11 @@ public List getLocations(long id) { // When there are multiple resources declared in the try-with-resource block // They are closed in the opposite order of declaration // Ref: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html - try (final RocksIterator iter = db().newIterator(mBlockLocationsColumn.get(), - mReadPrefixSameAsStart)) { + // We assume this operation is short (one block cannot have too many locations) + // and lock the full iteration + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock(); + final RocksIterator iter = db().newIterator(mBlockLocationsColumn.get(), + mReadPrefixSameAsStart)) { iter.seek(Longs.toByteArray(id)); List locations = new ArrayList<>(); for (; iter.isValid(); iter.next()) { @@ -368,7 +397,7 @@ public List getLocations(long id) { @Override public void addLocation(long id, BlockLocation location) { byte[] key = RocksUtils.toByteArray(id, location.getWorkerId()); - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { db().put(mBlockLocationsColumn.get(), mDisableWAL, key, location.toByteArray()); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -378,7 +407,7 @@ public void addLocation(long id, BlockLocation location) { @Override public void removeLocation(long blockId, long workerId) { byte[] key = RocksUtils.toByteArray(blockId, workerId); - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { db().delete(mBlockLocationsColumn.get(), mDisableWAL, key); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -386,13 +415,41 @@ public void removeLocation(long blockId, long workerId) { } @Override + /** + * Acquires an iterator to iterate all Blocks in RocksDB. + * A shared lock will be acquired when this iterator is created, and released when: + * 1. This iterator is complete. + * 2. At each step, the iterator finds the RocksDB is closing and aborts voluntarily. + * + * This iterator is used in: + * 1. {@link BlockIntegrityChecker} to iterate all existing blocks + * 2. Journal dumping like checkpoint/backup sequences + */ public CloseableIterator getCloseableIterator() { - RocksIterator iterator = db().newIterator(mBlockMetaColumn.get(), mIteratorOption); - return RocksUtils.createCloseableIterator(iterator, - (iter) -> new Block(Longs.fromByteArray(iter.key()), BlockMeta.parseFrom(iter.value()))); + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { + RocksSharedLockHandle readLock = mRocksStore.checkAndAcquireSharedLock(); + + RocksIterator iterator = db().newIterator(mBlockMetaColumn.get(), mIteratorOption); + return RocksUtils.createCloseableIterator(iterator, + (iter) -> new Block(Longs.fromByteArray(iter.key()), BlockMeta.parseFrom(iter.value())), + () -> { + mRocksStore.shouldAbort(lock.getLockVersion()); + return null; + }, readLock); + } } private RocksDB db() { return mRocksStore.getDb(); } + + @Override + public RocksStore getRocksStore() { + return mRocksStore; + } + + @Override + public CheckpointName getCheckpointName() { + return CheckpointName.BLOCK_MASTER; + } } diff --git a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksCheckpointed.java b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksCheckpointed.java new file mode 100644 index 000000000000..959dc13963c8 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksCheckpointed.java @@ -0,0 +1,86 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore.rocks; + +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.grpc.ErrorType; +import alluxio.master.journal.checkpoint.CheckpointInputStream; +import alluxio.master.journal.checkpoint.Checkpointed; + +import io.grpc.Status; +import org.rocksdb.RocksDBException; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; + +/** + * Provides default implementations for checkpointing RocksDB databases. + */ +public interface RocksCheckpointed extends Checkpointed { + /** + * @return the {@link RocksStore} that will produce a checkpoint + */ + RocksStore getRocksStore(); + + @Override + default CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.runAsync(() -> { + LOG.debug("taking {} snapshot started", getCheckpointName()); + try (RocksExclusiveLockHandle lock = getRocksStore().lockForCheckpoint()) { + File subDir = new File(directory, getCheckpointName().toString()); + try { + getRocksStore().writeToCheckpoint(subDir); + } catch (RocksDBException e) { + throw new AlluxioRuntimeException(Status.INTERNAL, + String.format("Failed to take snapshot %s in dir %s", getCheckpointName(), directory), + e, ErrorType.Internal, false); + } + LOG.debug("taking {} snapshot finished", getCheckpointName()); + } + }, executorService); + } + + @Override + default void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { + try (RocksExclusiveLockHandle lock = getRocksStore().lockForCheckpoint()) { + getRocksStore().writeToCheckpoint(output); + } + } + + @Override + default CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.runAsync(() -> { + LOG.debug("loading {} snapshot started", getCheckpointName()); + File subDir = new File(directory, getCheckpointName().toString()); + try (RocksExclusiveLockHandle lock = getRocksStore().lockForRewrite()) { + getRocksStore().restoreFromCheckpoint(subDir); + } catch (Exception e) { + throw new AlluxioRuntimeException(Status.INTERNAL, + String.format("Failed to restore snapshot %s", getCheckpointName()), + e, ErrorType.Internal, false); + } + LOG.debug("loading {} snapshot finished", getCheckpointName()); + }, executorService); + } + + @Override + default void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { + try (RocksExclusiveLockHandle lock = getRocksStore().lockForRewrite()) { + getRocksStore().restoreFromCheckpoint(input); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksInodeStore.java b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksInodeStore.java index 55a5110c0a1b..e785c67559a0 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksInodeStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksInodeStore.java @@ -21,7 +21,6 @@ import alluxio.master.file.meta.InodeDirectoryView; import alluxio.master.file.meta.InodeView; import alluxio.master.file.meta.MutableInode; -import alluxio.master.journal.checkpoint.CheckpointInputStream; import alluxio.master.journal.checkpoint.CheckpointName; import alluxio.master.metastore.InodeStore; import alluxio.master.metastore.ReadOption; @@ -52,8 +51,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -66,6 +63,7 @@ import java.util.Spliterators; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -76,13 +74,17 @@ * File store backed by RocksDB. */ @ThreadSafe -public class RocksInodeStore implements InodeStore { +public class RocksInodeStore implements InodeStore, RocksCheckpointed { private static final Logger LOG = LoggerFactory.getLogger(RocksInodeStore.class); private static final String INODES_DB_NAME = "inodes"; private static final String INODES_COLUMN = "inodes"; private static final String EDGES_COLUMN = "edges"; private static final String ROCKS_STORE_NAME = "InodeStore"; + /* + * Below 3 fields are created and managed by the external user class, + * no need to close in this class. + */ // These are fields instead of constants because they depend on the call to RocksDB.loadLibrary(). private final WriteOptions mDisableWAL; private final ReadOptions mReadPrefixSameAsStart; @@ -96,6 +98,9 @@ public class RocksInodeStore implements InodeStore { private final RocksStore mRocksStore; private final List mToClose = new ArrayList<>(); + /* + * The ColumnFamilyHandle instances are created and closed in RocksStore + */ private final AtomicReference mInodesColumn = new AtomicReference<>(); private final AtomicReference mEdgesColumn = new AtomicReference<>(); @@ -108,10 +113,13 @@ public RocksInodeStore(String baseDir) { RocksDB.loadLibrary(); // the rocksDB objects must be initialized after RocksDB.loadLibrary() is called mDisableWAL = new WriteOptions().setDisableWAL(true); + mToClose.add(mDisableWAL); mReadPrefixSameAsStart = new ReadOptions().setPrefixSameAsStart(true); + mToClose.add(mReadPrefixSameAsStart); mIteratorOption = new ReadOptions().setReadaheadSize( Configuration.getBytes(PropertyKey.MASTER_METASTORE_ITERATOR_READAHEAD_SIZE)) .setTotalOrderSeek(true); + mToClose.add(mIteratorOption); String dbPath = PathUtils.concatPath(baseDir, INODES_DB_NAME); String backupPath = PathUtils.concatPath(baseDir, INODES_DB_NAME + "-backup"); @@ -138,16 +146,20 @@ && new String(columns.get(2).getName()).equals(EDGES_COLUMN), .setCreateMissingColumnFamilies(true) .setCreateIfMissing(true) .setMaxOpenFiles(-1); + // This is a field instead of a constant because it depends on RocksDB.loadLibrary(). + CompressionType compressionType = + Configuration.getEnum(PropertyKey.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_TYPE, + CompressionType.class); columns.add(new ColumnFamilyDescriptor(INODES_COLUMN.getBytes(), new ColumnFamilyOptions() .useFixedLengthPrefixExtractor(Longs.BYTES) // allows memtable buckets by inode id .setMemTableConfig(new HashLinkedListMemTableConfig()) // bucket contains children ids - .setCompressionType(CompressionType.NO_COMPRESSION))); + .setCompressionType(compressionType))); columns.add(new ColumnFamilyDescriptor(EDGES_COLUMN.getBytes(), new ColumnFamilyOptions() .useFixedLengthPrefixExtractor(Longs.BYTES) // allows memtable buckets by inode id .setMemTableConfig(new HashLinkedListMemTableConfig()) // bucket only contains an id - .setCompressionType(CompressionType.NO_COMPRESSION))); + .setCompressionType(compressionType))); } mToClose.addAll(columns.stream().map( ColumnFamilyDescriptor::getOptions).collect(Collectors.toList())); @@ -276,7 +288,7 @@ && new String(columns.get(2).getName()).equals(EDGES_COLUMN), } private long getProperty(String rocksPropertyName) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { return db().getAggregatedLongProperty(rocksPropertyName); } catch (RocksDBException e) { LOG.warn(String.format("error collecting %s", rocksPropertyName), e); @@ -286,7 +298,7 @@ private long getProperty(String rocksPropertyName) { @Override public void remove(Long inodeId) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { byte[] id = Longs.toByteArray(inodeId); db().delete(mInodesColumn.get(), mDisableWAL, id); } catch (RocksDBException e) { @@ -296,7 +308,7 @@ public void remove(Long inodeId) { @Override public void writeInode(MutableInode inode) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { db().put(mInodesColumn.get(), mDisableWAL, Longs.toByteArray(inode.getId()), inode.toProto().toByteArray()); } catch (RocksDBException e) { @@ -311,12 +323,18 @@ public WriteBatch createWriteBatch() { @Override public void clear() { - mRocksStore.clear(); + LOG.info("Waiting to clear RocksInodeStore.."); + try (RocksExclusiveLockHandle lock = mRocksStore.lockForRewrite()) { + LOG.info("Clearing RocksDB"); + mRocksStore.clear(); + } + // Reset the DB state and prepare to serve again + LOG.info("RocksInodeStore cleared and ready to serve again"); } @Override public void addChild(long parentId, String childName, Long childId) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { db().put(mEdgesColumn.get(), mDisableWAL, RocksUtils.toByteArray(parentId, childName), Longs.toByteArray(childId)); } catch (RocksDBException e) { @@ -326,7 +344,7 @@ public void addChild(long parentId, String childName, Long childId) { @Override public void removeChild(long parentId, String name) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { db().delete(mEdgesColumn.get(), mDisableWAL, RocksUtils.toByteArray(parentId, name)); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -336,7 +354,7 @@ public void removeChild(long parentId, String name) { @Override public Optional> getMutable(long id, ReadOption option) { byte[] inode; - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { inode = db().get(mInodesColumn.get(), Longs.toByteArray(id)); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -353,37 +371,54 @@ public Optional> getMutable(long id, ReadOption option) { @Override public CloseableIterator getChildIds(Long inodeId, ReadOption option) { - RocksIterator iter = db().newIterator(mEdgesColumn.get(), mReadPrefixSameAsStart); - // first seek to the correct bucket - iter.seek(Longs.toByteArray(inodeId)); - // now seek to a specific file if needed - String prefix = option.getPrefix(); - String fromName = option.getStartFrom(); - String seekTo; - if (fromName != null && prefix != null) { - if (fromName.compareTo(prefix) > 0) { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { + RocksIterator iter = db().newIterator(mEdgesColumn.get(), mReadPrefixSameAsStart); + // first seek to the correct bucket + iter.seek(Longs.toByteArray(inodeId)); + // now seek to a specific file if needed + String prefix = option.getPrefix(); + String fromName = option.getStartFrom(); + String seekTo; + if (fromName != null && prefix != null) { + if (fromName.compareTo(prefix) > 0) { + seekTo = fromName; + } else { + seekTo = prefix; + } + } else if (fromName != null) { seekTo = fromName; } else { seekTo = prefix; } - } else if (fromName != null) { - seekTo = fromName; - } else { - seekTo = prefix; - } - if (seekTo != null && seekTo.length() > 0) { - iter.seek(RocksUtils.toByteArray(inodeId, seekTo)); + if (seekTo != null && seekTo.length() > 0) { + iter.seek(RocksUtils.toByteArray(inodeId, seekTo)); + } + /* + * Acquire a second lock for iteration, instead of using the same lock for initialization. + * Because init takes many operations and should be protected by try-with-resource. + * This is fine because the shared lock is reentrant. + */ + RocksSharedLockHandle readLock = mRocksStore.checkAndAcquireSharedLock(); + RocksIter rocksIter = new RocksIter(iter, prefix, () -> { + mRocksStore.shouldAbort(readLock.getLockVersion()); + return null; + }); + Stream idStream = StreamSupport.stream(Spliterators + .spliteratorUnknownSize(rocksIter, Spliterator.ORDERED), false); + return CloseableIterator.create(idStream.iterator(), (any) -> { + try { + iter.close(); + } finally { + readLock.close(); + } + }); } - RocksIter rocksIter = new RocksIter(iter, prefix); - Stream idStream = StreamSupport.stream(Spliterators - .spliteratorUnknownSize(rocksIter, Spliterator.ORDERED), false); - return CloseableIterator.create(idStream.iterator(), (any) -> iter.close()); } @Override public Optional getChildId(Long inodeId, String name, ReadOption option) { byte[] id; - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { id = db().get(mEdgesColumn.get(), RocksUtils.toByteArray(inodeId, name)); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -399,8 +434,10 @@ static class RocksIter implements Iterator { final RocksIterator mIter; boolean mStopped = false; final byte[] mPrefix; + Supplier mAbortCheck; - RocksIter(RocksIterator rocksIterator, @Nullable String prefix) { + RocksIter(RocksIterator rocksIterator, @Nullable String prefix, + Supplier abortCheck) { mIter = rocksIterator; if (prefix != null && prefix.length() > 0) { mPrefix = prefix.getBytes(); @@ -408,6 +445,7 @@ static class RocksIter implements Iterator { mPrefix = null; } checkPrefix(); + mAbortCheck = abortCheck; } private void checkPrefix() { @@ -433,6 +471,8 @@ public boolean hasNext() { @Override public Long next() { + // Abort the operation if RocksDB stops serving + mAbortCheck.get(); Long l = Longs.fromByteArray(mIter.value()); mIter.next(); checkPrefix(); @@ -442,6 +482,7 @@ public Long next() { @Override public Optional getChild(Long inodeId, String name, ReadOption option) { + // The underlying calls should each handle locking internally return getChildId(inodeId, name).flatMap(id -> { Optional child = get(id); if (!child.isPresent()) { @@ -454,7 +495,8 @@ public Optional getChild(Long inodeId, String name, ReadOption option) { @Override public boolean hasChildren(InodeDirectoryView inode, ReadOption option) { - try (RocksIterator iter = db().newIterator(mEdgesColumn.get(), mReadPrefixSameAsStart)) { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock(); + RocksIterator iter = db().newIterator(mEdgesColumn.get(), mReadPrefixSameAsStart)) { iter.seek(Longs.toByteArray(inode.getId())); return iter.isValid(); } @@ -463,10 +505,11 @@ public boolean hasChildren(InodeDirectoryView inode, ReadOption option) { @Override public Set allEdges() { Set edges = new HashSet<>(); - try (RocksIterator iter = db().newIterator(mEdgesColumn.get(), - mIteratorOption)) { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock(); + RocksIterator iter = db().newIterator(mEdgesColumn.get(), mIteratorOption)) { iter.seekToFirst(); while (iter.isValid()) { + mRocksStore.shouldAbort(lock.getLockVersion()); long parentId = RocksUtils.readLong(iter.key(), 0); String childName = new String(iter.key(), Longs.BYTES, iter.key().length - Longs.BYTES); long childId = Longs.fromByteArray(iter.value()); @@ -480,10 +523,11 @@ public Set allEdges() { @Override public Set> allInodes() { Set> inodes = new HashSet<>(); - try (RocksIterator iter = db().newIterator(mInodesColumn.get(), - mIteratorOption)) { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock(); + RocksIterator iter = db().newIterator(mInodesColumn.get(), mIteratorOption)) { iter.seekToFirst(); while (iter.isValid()) { + mRocksStore.shouldAbort(lock.getLockVersion()); inodes.add(getMutable(Longs.fromByteArray(iter.key()), ReadOption.defaults()).get()); iter.next(); } @@ -492,14 +536,28 @@ public Set> allInodes() { } /** - * The name is intentional, in order to distinguish from the {@code Iterable} interface. + * Acquires an iterator to iterate all Inodes in RocksDB. + * A shared lock will be acquired when this iterator is created, and released when: + * 1. This iterator is complete. + * 2. At each step, the iterator finds the RocksDB is closing and aborts voluntarily. + * + * Except tests, this iterator is only used in: + * 1. {@link alluxio.master.journal.tool.AbstractJournalDumper} which translates RocksDB + * checkpoints to a human-readable form. * * @return an iterator over stored inodes */ public CloseableIterator getCloseableIterator() { - return RocksUtils.createCloseableIterator( - db().newIterator(mInodesColumn.get(), mIteratorOption), - (iter) -> getMutable(Longs.fromByteArray(iter.key()), ReadOption.defaults()).get()); + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { + RocksSharedLockHandle readLock = mRocksStore.checkAndAcquireSharedLock(); + return RocksUtils.createCloseableIterator( + db().newIterator(mInodesColumn.get(), mIteratorOption), + (iter) -> getMutable(Longs.fromByteArray(iter.key()), ReadOption.defaults()).get(), + () -> { + mRocksStore.shouldAbort(lock.getLockVersion()); + return null; + }, readLock); + } } @Override @@ -513,13 +571,8 @@ public CheckpointName getCheckpointName() { } @Override - public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { - mRocksStore.writeToCheckpoint(output); - } - - @Override - public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { - mRocksStore.restoreFromCheckpoint(input); + public RocksStore getRocksStore() { + return mRocksStore; } private class RocksWriteBatch implements WriteBatch { @@ -527,7 +580,7 @@ private class RocksWriteBatch implements WriteBatch { @Override public void writeInode(MutableInode inode) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { mBatch.put(mInodesColumn.get(), Longs.toByteArray(inode.getId()), inode.toProto().toByteArray()); } catch (RocksDBException e) { @@ -537,7 +590,7 @@ public void writeInode(MutableInode inode) { @Override public void removeInode(Long key) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { mBatch.delete(mInodesColumn.get(), Longs.toByteArray(key)); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -546,7 +599,7 @@ public void removeInode(Long key) { @Override public void addChild(Long parentId, String childName, Long childId) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { mBatch.put(mEdgesColumn.get(), RocksUtils.toByteArray(parentId, childName), Longs.toByteArray(childId)); } catch (RocksDBException e) { @@ -556,7 +609,7 @@ public void addChild(Long parentId, String childName, Long childId) { @Override public void removeChild(Long parentId, String childName) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { mBatch.delete(mEdgesColumn.get(), RocksUtils.toByteArray(parentId, childName)); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -565,7 +618,7 @@ public void removeChild(Long parentId, String childName) { @Override public void commit() { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { db().write(mDisableWAL, mBatch); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -580,14 +633,13 @@ public void close() { @Override public void close() { - LOG.info("Closing RocksInodeStore and recycling all RocksDB JNI objects"); - mRocksStore.close(); - mDisableWAL.close(); - mReadPrefixSameAsStart.close(); - // Close the elements in the reverse order they were added - Collections.reverse(mToClose); - mToClose.forEach(RocksObject::close); - LOG.info("RocksInodeStore closed"); + LOG.info("RocksInodeStore is being closed"); + try (RocksExclusiveLockHandle lock = mRocksStore.lockForClosing()) { + mRocksStore.close(); + // Close the elements in the reverse order they were added + Collections.reverse(mToClose); + mToClose.forEach(RocksObject::close); + } } private RocksDB db() { @@ -600,10 +652,12 @@ private RocksDB db() { */ public String toStringEntries() { StringBuilder sb = new StringBuilder(); - try (ReadOptions readOptions = new ReadOptions().setTotalOrderSeek(true); - RocksIterator inodeIter = db().newIterator(mInodesColumn.get(), readOptions)) { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock(); + ReadOptions readOptions = new ReadOptions().setTotalOrderSeek(true); + RocksIterator inodeIter = db().newIterator(mInodesColumn.get(), readOptions)) { inodeIter.seekToFirst(); while (inodeIter.isValid()) { + mRocksStore.shouldAbort(lock.getLockVersion()); MutableInode inode; try { inode = MutableInode.fromProto(InodeMeta.Inode.parseFrom(inodeIter.value())); @@ -615,9 +669,11 @@ public String toStringEntries() { inodeIter.next(); } } - try (RocksIterator edgeIter = db().newIterator(mEdgesColumn.get())) { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock(); + RocksIterator edgeIter = db().newIterator(mEdgesColumn.get())) { edgeIter.seekToFirst(); while (edgeIter.isValid()) { + mRocksStore.shouldAbort(lock.getLockVersion()); byte[] key = edgeIter.key(); byte[] id = new byte[Longs.BYTES]; byte[] name = new byte[key.length - Longs.BYTES]; @@ -633,6 +689,8 @@ public String toStringEntries() { /** * A testing only method to access the internal objects. + * For simplicity, no thread safety is provided on the escaping objects. + * * @return the RocksDB objects references the InodesColumn */ @VisibleForTesting diff --git a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java index 42b125d0ef6d..b7778af0dd40 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java @@ -14,14 +14,19 @@ import alluxio.Constants; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.exception.ExceptionMessage; +import alluxio.exception.runtime.UnavailableRuntimeException; import alluxio.master.journal.checkpoint.CheckpointInputStream; import alluxio.master.journal.checkpoint.CheckpointOutputStream; import alluxio.master.journal.checkpoint.CheckpointType; +import alluxio.retry.CountingRetry; import alluxio.retry.TimeoutRetry; -import alluxio.util.ParallelZipUtils; -import alluxio.util.TarUtils; +import alluxio.util.SleepUtils; +import alluxio.util.compression.ParallelZipUtils; +import alluxio.util.compression.TarUtils; import alluxio.util.io.FileUtils; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.commons.io.IOUtils; import org.rocksdb.BlockBasedTableConfig; @@ -47,36 +52,128 @@ import java.io.IOException; import java.io.OutputStream; import java.nio.file.Paths; +import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Optional; import java.util.UUID; +import java.util.concurrent.Callable; import java.util.concurrent.atomic.AtomicReference; -import javax.annotation.concurrent.ThreadSafe; +import java.util.concurrent.atomic.AtomicStampedReference; +import java.util.concurrent.atomic.LongAdder; +import javax.annotation.concurrent.NotThreadSafe; /** * Class for managing a rocksdb database. This class handles common functionality such as * initializing the database and performing database backup/restore. * - * Thread safety is achieved by synchronizing all public methods. + * This class provides locking methods for the callers. And the thread safety of RocksDB + * relies on the caller to use the corresponding lock methods. + * The reasons why this class only provides thread safety utilities to the callers + * (instead of wrapping it under each call) are: + * 1. Callers like RocksInodeStore and RocksBlockMetaStore have specific read/write logic + * like iteration, which cannot be abstracted and locked internally in this class. + * 2. With locking methods provided by this class, callers like RocksInodeStore + * can actually reuse the locks to perform concurrency control on their own logic. + * + * For reading/writing on the RocksDB, use the shared lock + *
+ *   try (RocksSharedLockHandle r = mRocksStore.checkAndAcquireSharedLock() {
+ *     // perform your read/write operation
+ *   }
+ * 
+ * + * For operations like closing/restart/restoring on the RocksDB, an exclusive lock should + * be acquired by calling one of: + * 1. {@link #lockForClosing()} + * 2. {@link #lockForRewrite()} + * 3. {@link #lockForCheckpoint()} + * + * Rule of thumb: + * 1. Use the proper locking methods when you access RocksDB. + * 2. Make each operation short. Make the locked section short. + * 3. If you have to make the operation long (like iteration), utilize {@link #shouldAbort(int)} + * to check and abort voluntarily. + * See Javadoc on the locking methods for details. */ -@ThreadSafe +@NotThreadSafe public final class RocksStore implements Closeable { private static final Logger LOG = LoggerFactory.getLogger(RocksStore.class); public static final int ROCKS_OPEN_RETRY_TIMEOUT = 20 * Constants.SECOND_MS; + public static final Duration ROCKS_CLOSE_WAIT_TIMEOUT = + Configuration.getDuration(PropertyKey.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT); + private static final boolean TEST_MODE = Configuration.getBoolean(PropertyKey.TEST_MODE); + private final String mName; private final String mDbPath; private final String mDbCheckpointPath; private final Integer mParallelBackupPoolSize; + + private final int mCompressLevel = Configuration.getInt( + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL); + private final boolean mParallelBackup = Configuration.getBoolean( + PropertyKey.MASTER_METASTORE_ROCKS_PARALLEL_BACKUP); + + /* + * Below 2 fields are created and managed by the external user class, + * no need to close in this class. + */ private final Collection mColumnFamilyDescriptors; private final DBOptions mDbOpts; - - private RocksDB mDb; - private Checkpoint mCheckpoint; - // When we create the database, we must set these handles. + /* + * Below 3 fields are created and managed internally to this class, + * must be closed in this class. + */ + private volatile RocksDB mDb; + private volatile Checkpoint mCheckpoint; private final List> mColumnHandles; + /* + * The state consists of two information. + * + * The boolean flag indicates whether the RocksDB wants to stop serving. + * TRUE - Stop serving + * FALSE - Serving normally + * + * The version number indicates whether the RocksDB has been rewritten. + * If the RocksDB is restored or wiped out, the version number goes up. + * If the RocksDB is paused just to dump a checkpoint, the version number is kept the same. + * A reader can rely on the version to tell whether it can still read the RocksDB + * after the exclusive lock is taken and released. + */ + public final AtomicStampedReference mRocksDbStopServing = + new AtomicStampedReference<>(false, 0); + public volatile LongAdder mRefCount = new LongAdder(); + + /* + * Normally, the ref count will still be zero when the exclusive lock is held because: + * 1. If the exclusive lock was not forced, that means the ref count has decremented to zero + * before the exclusive lock was taken. And while the exclusive lock was held, no readers + * was able to come in and increment the ref count. + * 2. If the exclusive lock was forced, the old ref count instance was thrown away. + * So even if there were a slow reader, that would not touch the new ref count incorrectly. + * Therefore, the new ref count should stay zero. + * + * However, we still added this sanity check as a canary for incorrect ref count usages. + */ + private final Callable mCheckRefCount = () -> { + long refCount = getSharedLockCount(); + if (TEST_MODE) { + // In test mode we enforce strict ref count check, as a canary for ref count issues + Preconditions.checkState(refCount == 0, + ExceptionMessage.ROCKS_DB_REF_COUNT_DIRTY.getMessage(refCount)); + } else { + // In a real deployment, we forgive potential ref count problems and take the risk + if (refCount != 0) { + LOG.warn(ExceptionMessage.ROCKS_DB_REF_COUNT_DIRTY.getMessage(refCount)); + } + resetRefCounter(); + } + return null; + }; + /** * @param name a name to distinguish what store this is * @param dbPath a path for the rocks database @@ -97,7 +194,8 @@ public RocksStore(String name, String dbPath, String checkpointPath, DBOptions d mColumnFamilyDescriptors = columnFamilyDescriptors; mDbOpts = dbOpts; mColumnHandles = columnHandles; - try { + LOG.info("Resetting RocksDB for {} on init", name); + try (RocksExclusiveLockHandle lock = lockForRewrite()) { resetDb(); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -105,17 +203,20 @@ public RocksStore(String name, String dbPath, String checkpointPath, DBOptions d } /** + * Requires the caller to acquire a shared lock by calling {@link #checkAndAcquireSharedLock()}. + * * @return the underlying rocksdb instance. The instance changes when clear() is called, so if the * caller caches the returned db, they must reset it after calling clear() */ - public synchronized RocksDB getDb() { + public RocksDB getDb() { return mDb; } /** * Clears and re-initializes the database. + * Requires the caller to acquire exclusive lock by calling {@link #lockForRewrite()}. */ - public synchronized void clear() { + public void clear() { try { resetDb(); } catch (RocksDBException e) { @@ -190,12 +291,22 @@ private void createDb() throws RocksDBException { LOG.info("Opened rocks database under path {}", mDbPath); } + /** + * Writes a checkpoint under the specified directory. + * @param directory that the checkpoint will be written under + * @throws RocksDBException if it encounters and error when writing the checkpoint + */ + public synchronized void writeToCheckpoint(File directory) throws RocksDBException { + mCheckpoint.createCheckpoint(directory.getPath()); + } + /** * Writes a checkpoint of the database's content to the given output stream. + * Requires the caller to acquire an exclusive lock by calling {@link #lockForCheckpoint()}. * * @param output the stream to write to */ - public synchronized void writeToCheckpoint(OutputStream output) + public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { LOG.info("Creating rocksdb checkpoint at {}", mDbCheckpointPath); long startNano = System.nanoTime(); @@ -208,18 +319,16 @@ public synchronized void writeToCheckpoint(OutputStream output) throw new IOException(e); } - if (Configuration.getBoolean(PropertyKey.MASTER_METASTORE_ROCKS_PARALLEL_BACKUP)) { + if (mParallelBackup) { CheckpointOutputStream out = new CheckpointOutputStream(output, CheckpointType.ROCKS_PARALLEL); LOG.info("Checkpoint complete, compressing with {} threads", mParallelBackupPoolSize); - int compressLevel = Configuration.getInt( - PropertyKey.MASTER_METASTORE_ROCKS_PARALLEL_BACKUP_COMPRESSION_LEVEL); ParallelZipUtils.compress(Paths.get(mDbCheckpointPath), out, - mParallelBackupPoolSize, compressLevel); + mParallelBackupPoolSize, mCompressLevel); } else { CheckpointOutputStream out = new CheckpointOutputStream(output, CheckpointType.ROCKS_SINGLE); LOG.info("Checkpoint complete, compressing with one thread"); - TarUtils.writeTarGz(Paths.get(mDbCheckpointPath), out); + TarUtils.writeTarGz(Paths.get(mDbCheckpointPath), out, mCompressLevel); } LOG.info("Completed rocksdb checkpoint in {}ms", (System.nanoTime() - startNano) / 1_000_000); @@ -227,12 +336,31 @@ public synchronized void writeToCheckpoint(OutputStream output) FileUtils.deletePathRecursively(mDbCheckpointPath); } + /** + * Restores RocksDB state from a checkpoint at the provided location. Moves the directory to a + * permanent location, restores RocksDB state, and then immediately takes a new snapshot in the + * original location as replacement. + * @param directory where the checkpoint is located + * @throws RocksDBException if rocks encounters a problem + * @throws IOException if moving files around encounters a problem + */ + public synchronized void restoreFromCheckpoint(File directory) + throws RocksDBException, IOException { + stopDb(); + File dbPath = new File(mDbPath); + org.apache.commons.io.FileUtils.deleteDirectory(dbPath); + org.apache.commons.io.FileUtils.moveDirectory(directory, dbPath); + createDb(); + writeToCheckpoint(directory); + } + /** * Restores the database from a checkpoint. + * Requires the caller to acquire an exclusive lock by calling {@link #lockForRewrite()}. * * @param input the checkpoint stream to restore from */ - public synchronized void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { + public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { LOG.info("Restoring rocksdb from checkpoint"); long startNano = System.nanoTime(); Preconditions.checkState(input.getType() == CheckpointType.ROCKS_SINGLE @@ -244,7 +372,7 @@ public synchronized void restoreFromCheckpoint(CheckpointInputStream input) thro if (input.getType() == CheckpointType.ROCKS_PARALLEL) { List tmpDirs = Configuration.getList(PropertyKey.TMP_DIRS); String tmpZipFilePath = new File(tmpDirs.get(0), "alluxioRockStore-" + UUID.randomUUID()) - .getPath(); + .getPath(); try { try (FileOutputStream fos = new FileOutputStream(tmpZipFilePath)) { @@ -252,7 +380,7 @@ public synchronized void restoreFromCheckpoint(CheckpointInputStream input) thro } ParallelZipUtils.decompress(Paths.get(mDbPath), tmpZipFilePath, - mParallelBackupPoolSize); + mParallelBackupPoolSize); FileUtils.deletePathRecursively(tmpZipFilePath); } catch (Exception e) { @@ -269,11 +397,14 @@ public synchronized void restoreFromCheckpoint(CheckpointInputStream input) thro throw new IOException(e); } LOG.info("Restored rocksdb checkpoint in {}ms", - (System.nanoTime() - startNano) / Constants.MS_NANO); + (System.nanoTime() - startNano) / Constants.MS_NANO); } @Override - public synchronized void close() { + /** + * Requires the caller to acquire exclusive lock by calling {@link #lockForClosing()}. + */ + public void close() { stopDb(); LOG.info("Closed store at {}", mDbPath); } @@ -344,4 +475,283 @@ private static IndexType toRocksIndexType( throw new IllegalArgumentException(String.format("Unknown IndexType %s", index)); } } + + /** + * This is the core logic of the shared lock mechanism. + * + * Before any r/w operation on the RocksDB, acquire a shared lock with this method. + * The shared lock guarantees the RocksDB will not be restarted/cleared during the + * r/w access. In other words, similar to a read-write lock, exclusive lock requests + * will wait for shared locks to be released first. + * + * However, note that exclusive lock acquisition only waits for a certain period of time, + * defined by {@link PropertyKey#MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT}. + * After this timeout, the exclusive lock will be forced, and the shared lock holders + * are disrespected. Normally, the r/w operation should either complete or abort within + * seconds so the timeout {@link PropertyKey#MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT} + * should not be exceeded at all. + * + * @return a shared lock handle used to manage and close the shared lock + */ + public RocksSharedLockHandle checkAndAcquireSharedLock() { + if (mRocksDbStopServing.getReference()) { + throw new UnavailableRuntimeException(ExceptionMessage.ROCKS_DB_CLOSING.getMessage()); + } + /* + * The lock action is merely incrementing the lock so it is very fast + * The closer will respect the ref count and only close when the ref count is zero + */ + mRefCount.increment(); + + /* + * Need to check the flag again to PREVENT the sequence of events below: + * 1. Reader checks flag + * 2. Closer sets flag + * 3. Closer sees refCount=0 + * 4. Reader increments refCount + * 5. Closer closes RocksDB + * 6. Reader reads RocksDB and incurs a segfault + * + * With the 2nd check, we make sure the ref count will be respected by the closer and + * the closer will therefore wait for this reader to complete/abort. + */ + if (mRocksDbStopServing.getReference()) { + mRefCount.decrement(); + throw new UnavailableRuntimeException(ExceptionMessage.ROCKS_DB_CLOSING.getMessage()); + } + + return new RocksSharedLockHandle(mRocksDbStopServing.getStamp(), mRefCount); + } + + /** + * This is the core logic of the exclusive lock mechanism. + * + * The exclusive lock will first set a flag and then wait for all shared lock holders to + * complete/abort. The time to wait is defined by + * {@link PropertyKey#MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT}. + * When the r/w operations observe this flag by {@link #shouldAbort(int)}, + * the operation will be aborted and the shared lock will be released. + * Some short operations do not check the {@link #shouldAbort(int)} because we expect + * them to finish fast. + * + * Normally, the default value of this timeout is long enough. + * However, if the ref count is still not zero after this wait, the exclusive lock will + * be forced and some warnings will be logged. There are multiple possibilities: + * 1. There is a very slow r/w operation. + * 2. Some r/w operation somewhere are not following the rules. + * 3. There is a bug somewhere, and the ref count is incorrect. + * In either case, submit an issue to https://github.com/Alluxio/alluxio/issues + * And we do not recommend tuning + * {@link PropertyKey#MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT} + * because it usually just covers the real issue. + * + * There are 4 cases where the exclusive lock is acquired: + * 1. The master is closing (and the process will exit). + * 2. The RocksDB will be cleared. This happens when the master process starts or in a failover. + * 3. The master is just dumping a checkpoint, where the RocksDB contents will not change. + * 4. The master is restoring from a checkpoint/backup where the RocksDB is rebuilt. + * + * When the master is closing, it will not wait for an ongoing checkpoint/restore/clear + * operation and will just grab the lock even though the exclusive lock is taken. + * Then the master process will exit and whatever operation will be aborted. + * This covers case 1 and yieldToAnotherCloser=false. + * + * In case 2, 3 or 4, we let the later closer(writer) fail. It will be the caller's + * responsibility to either retry or abort. In other words, when yieldToAnotherClose=true, + * the one who sets the mState will succeed and the other one will fail. + * + * @param yieldToAnotherCloser if true, the operation will fail if it observes a concurrent + * action on the exclusive lock + */ + private void setFlagAndBlockingWait(boolean yieldToAnotherCloser) { + // Another known operation has acquired the exclusive lock + if (yieldToAnotherCloser && mRocksDbStopServing.getReference()) { + throw new UnavailableRuntimeException(ExceptionMessage.ROCKS_DB_CLOSING.getMessage()); + } + + int version = mRocksDbStopServing.getStamp(); + if (yieldToAnotherCloser) { + if (!mRocksDbStopServing.compareAndSet(false, true, version, version)) { + throw new UnavailableRuntimeException(ExceptionMessage.ROCKS_DB_CLOSING.getMessage()); + } + } else { + // Just set the state with no respect to concurrent actions + mRocksDbStopServing.set(true, version); + } + + /* + * Wait until: + * 1. Ref count is zero, meaning all concurrent r/w have completed or aborted + * 2. Timeout is reached, meaning we force close/restart without waiting + * + * According to Java doc + * https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/atomic/LongAdder.html + * In absence of concurrent updates, sum() returns an accurate result. + * But sum() does not see concurrent updates and therefore can miss an update. + * + * The correctness then relies on the 2nd check in checkAndAcquireSharedLock() + * because the reader will see the flag and just abort voluntarily. An example sequence + * of events is like below: + * 1. Reader checks flag, the flag is not set by the closer + * 2. Closer sets flag + * 3. Closer sees refCount=0 + * 4. Reader increments refCount + * 5. Closer closes RocksDB + * 6. Reader checks flag again and sees the flag + * 7. Reader decrements refCount aborts in checkAndAcquireSharedLock() + */ + Instant waitStart = Instant.now(); + CountingRetry retry = new CountingRetry((int) ROCKS_CLOSE_WAIT_TIMEOUT.getSeconds() * 10); + while (mRefCount.sum() != 0 && retry.attempt()) { + SleepUtils.sleepMs(100); + } + Duration elapsed = Duration.between(waitStart, Instant.now()); + LOG.info("Waited {}ms for ongoing read/write to complete/abort", elapsed.toMillis()); + + /* + * Reset the ref count to forget about the aborted operations + */ + long unclosedOperations = mRefCount.sum(); + if (unclosedOperations != 0) { + if (Configuration.getBoolean(PropertyKey.TEST_MODE)) { + throw new RuntimeException(ExceptionMessage.ROCKS_DB_EXCLUSIVE_LOCK_FORCED + .getMessage(unclosedOperations)); + } + /* + * Set the flag so shared locks know that the ref count has been reset, + * no need to update the ref count on unlock. + * If one shared lock did not decrement the ref count before this reset, it should not + * decrement the ref count when it is released. + */ + resetRefCounter(); + LOG.warn("{} readers/writers fail to complete/abort before we stop/restart the RocksDB", + unclosedOperations); + } + } + + /** + * When the exclusive lock is forced (after a timeout), we have to reset the ref count to zero + * and throw away the updates from the concurrent readers. In other words, those readers should + * not update the ref count when they release the lock. One possible sequence of events + * goes as below: + * + * 1. Reader checks the flag. + * 2. Reader increments refCount. + * 3. Reader is blocked (for a lock) or goes to sleep. + * 4. One Closer comes in, sets the flag and waits on refCount. + * 5. Closer wait times out. Closer forces the exclusive lock and resets refCount to 0. + * 6. Instead of closing the RocksDB, the exclusive lock is taken for restoring the RocksDB. + * 7. Closer finishes and resets the flag to 0. + * 8. Reader wakes up and releases the shared lock, now it should NOT decrement the ref count. + * + * We create a new ref counter and throw away the existing one. So the old readers will + * update the old counter when they release the lock, and only the new counter will be used. + */ + private void resetRefCounter() { + mRefCount = new LongAdder(); + } + + /** + * Before the process shuts down, acquire an exclusive lock on the RocksDB before closing. + * Note this lock only exists on the Alluxio side. A STOP_SERVING flag will be set so all + * existing readers/writers will abort asap. + * The exclusive lock ensures there are no existing concurrent r/w operations, so it is safe to + * close the RocksDB and recycle all relevant resources. + * + * The STOP_SERVING status will NOT be reset, because the process will shut down soon. + * + * @return the exclusive lock handle used to manage and close the lock + */ + public RocksExclusiveLockHandle lockForClosing() { + // Grab the lock with no respect to concurrent operations + // Just grab the lock and close + setFlagAndBlockingWait(false); + return new RocksExclusiveLockHandle(mCheckRefCount); + } + + /** + * Before the process shuts down, acquire an exclusive lock on the RocksDB before closing. + * Note this lock only exists on the Alluxio side. A STOP_SERVING flag will be set so all + * existing readers/writers will abort asap. + * The exclusive lock ensures there are no existing concurrent r/w operations, so it is safe to + * restart/checkpoint the RocksDB and update the DB reference. + * + * The STOP_SERVING status will be reset and the RocksDB will be open for operations again. + * The version will not be bumped up, because the RocksDB contents has not changed. + * See {@link #checkAndAcquireSharedLock} for how this affects the shared lock logic. + * + * @return the exclusive lock handle used to manage and close the lock + */ + public RocksExclusiveLockHandle lockForCheckpoint() { + // Grab the lock with respect to contenders + setFlagAndBlockingWait(true); + return new RocksExclusiveLockHandle(() -> { + mCheckRefCount.call(); + // There is no need to worry about overwriting another concurrent Closer action + // The only chance of concurrency is with lockForClosing() + // But lockForClosing() guarantees the master process will close immediately + mRocksDbStopServing.set(false, mRocksDbStopServing.getStamp()); + return null; + }); + } + + /** + * Before the process shuts down, acquire an exclusive lock on the RocksDB before closing. + * Note this lock only exists on the Alluxio side. A STOP_SERVING flag will be set so all + * existing readers/writers will abort asap. + * The exclusive lock ensures there are no existing concurrent r/w operations, so it is safe to + * restart/checkpoint the RocksDB and update the DB reference. + * + * The STOP_SERVING status will be reset and the RocksDB will be open for operations again. + * The version will be bumped up, because the RocksDB contents has changed. If there is one slow + * operation expecting to see the old version, that operation should abort. + * See {@link #checkAndAcquireSharedLock} for how this affects the shared lock logic. + * + * @return the exclusive lock handle used to manage and close the lock + */ + public RocksExclusiveLockHandle lockForRewrite() { + // Grab the lock with respect to contenders + setFlagAndBlockingWait(true); + return new RocksExclusiveLockHandle(() -> { + mCheckRefCount.call(); + // There is no need to worry about overwriting another concurrent Closer action + // The only chance of concurrency is with lockForClosing() + // But lockForClosing() guarantees the master process will close immediately + mRocksDbStopServing.set(false, mRocksDbStopServing.getStamp() + 1); + return null; + }); + } + + /** + * Used by ongoing r/w operations to check if the operation needs to abort and yield + * to the RocksDB shutdown. + * + * @param lockedVersion The RocksDB version from the shared lock. This version is used to tell + * if a restore or clear operation has happened on the RocksDB. + */ + public void shouldAbort(int lockedVersion) { + if (mRocksDbStopServing.getReference()) { + throw new UnavailableRuntimeException(ExceptionMessage.ROCKS_DB_CLOSING.getMessage()); + } else if (lockedVersion < mRocksDbStopServing.getStamp()) { + throw new UnavailableRuntimeException(ExceptionMessage.ROCKS_DB_REWRITTEN.getMessage()); + } + } + + /** + * Checks whether the RocksDB is marked for exclusive access, so the operation should abort. + * @return whether the RocksDB expects to stop + */ + public boolean isServiceStopping() { + return mRocksDbStopServing.getReference(); + } + + /** + * Gets the number of shared lock on the RocksStore. + * + * @return the count + */ + @VisibleForTesting + public long getSharedLockCount() { + return mRefCount.sum(); + } } diff --git a/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java b/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java index c21186140b8c..3ccbb8c7aba1 100644 --- a/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java +++ b/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java @@ -18,6 +18,7 @@ import alluxio.grpc.GrpcService; import alluxio.grpc.MetricValue; import alluxio.grpc.ServiceType; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatExecutor; import alluxio.heartbeat.HeartbeatThread; @@ -30,11 +31,13 @@ import alluxio.metrics.MetricsSystem; import alluxio.metrics.MultiValueMetricsAggregator; import alluxio.metrics.aggregator.SingleTagValueAggregator; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.util.executor.ExecutorServiceFactories; import alluxio.util.executor.ExecutorServiceFactory; import com.codahale.metrics.Gauge; import com.google.common.annotations.VisibleForTesting; +import io.grpc.ServerInterceptors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -164,7 +167,9 @@ public String getName() { public Map getServices() { Map services = new HashMap<>(); services.put(ServiceType.METRICS_MASTER_CLIENT_SERVICE, - new GrpcService(getMasterServiceHandler())); + new GrpcService(ServerInterceptors.intercept( + getMasterServiceHandler(), + new ClientContextServerInjector()))); return services; } @@ -176,7 +181,8 @@ public void start(Boolean isLeader) throws IOException { if (isLeader) { getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_CLUSTER_METRICS_UPDATER, new ClusterMetricsUpdater(), - Configuration.getMs(PropertyKey.MASTER_CLUSTER_METRICS_UPDATE_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_CLUSTER_METRICS_UPDATE_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); } } @@ -211,7 +217,7 @@ public Map getMetrics() { */ private class ClusterMetricsUpdater implements HeartbeatExecutor { @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { updateMultiValueMasterMetrics(); } diff --git a/core/server/master/src/main/java/alluxio/master/metrics/MetricsStore.java b/core/server/master/src/main/java/alluxio/master/metrics/MetricsStore.java index cca38f792671..9ded436a8af0 100644 --- a/core/server/master/src/main/java/alluxio/master/metrics/MetricsStore.java +++ b/core/server/master/src/main/java/alluxio/master/metrics/MetricsStore.java @@ -185,6 +185,9 @@ public void initMetricKeys() { mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.WORKER, MetricKey.WORKER_BYTES_READ_DOMAIN.getMetricName()), MetricsSystem.counter(MetricKey.CLUSTER_BYTES_READ_DOMAIN.getName())); + mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.WORKER, + MetricKey.WORKER_BYTES_READ_CACHE.getMetricName()), + MetricsSystem.counter(MetricKey.CLUSTER_BYTES_READ_CACHE.getName())); mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.WORKER, MetricKey.WORKER_BYTES_WRITTEN_REMOTE.getMetricName()), MetricsSystem.counter(MetricKey.CLUSTER_BYTES_WRITTEN_REMOTE.getName())); diff --git a/core/server/master/src/main/java/alluxio/master/scheduler/DefaultWorkerProvider.java b/core/server/master/src/main/java/alluxio/master/scheduler/DefaultWorkerProvider.java new file mode 100644 index 000000000000..3d7f623999ab --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/scheduler/DefaultWorkerProvider.java @@ -0,0 +1,65 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.scheduler; + +import alluxio.client.block.stream.BlockWorkerClient; +import alluxio.client.file.FileSystemContext; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.exception.runtime.UnavailableRuntimeException; +import alluxio.exception.status.UnavailableException; +import alluxio.master.file.FileSystemMaster; +import alluxio.resource.CloseableResource; +import alluxio.scheduler.job.WorkerProvider; +import alluxio.wire.WorkerInfo; +import alluxio.wire.WorkerNetAddress; + +import java.io.IOException; +import java.util.List; + +/** + * Default worker provider that get worker information from Alluxio master. + */ +public class DefaultWorkerProvider implements WorkerProvider { + private final FileSystemMaster mFileSystemMaster; + private final FileSystemContext mContext; + + /** + * Creates a new instance of {@link DefaultWorkerProvider}. + * + * @param fileSystemMaster the file system master + * @param context the file system context + */ + public DefaultWorkerProvider(FileSystemMaster fileSystemMaster, FileSystemContext context) { + mFileSystemMaster = fileSystemMaster; + mContext = context; + } + + @Override + public List getWorkerInfos() { + try { + // TODO(jianjian): need api for healthy worker instead + return mFileSystemMaster.getWorkerInfoList(); + } catch (UnavailableException e) { + throw new UnavailableRuntimeException( + "fail to get worker infos because master is not available", e); + } + } + + @Override + public CloseableResource getWorkerClient(WorkerNetAddress address) { + try { + return mContext.acquireBlockWorkerClient(address); + } catch (IOException e) { + throw AlluxioRuntimeException.from(e); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/scheduler/JournaledJobMetaStore.java b/core/server/master/src/main/java/alluxio/master/scheduler/JournaledJobMetaStore.java new file mode 100644 index 000000000000..1b262fb0449f --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/scheduler/JournaledJobMetaStore.java @@ -0,0 +1,89 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.scheduler; + +import alluxio.collections.ConcurrentHashSet; +import alluxio.exception.runtime.UnavailableRuntimeException; +import alluxio.exception.status.UnavailableException; +import alluxio.master.file.FileSystemMaster; +import alluxio.master.job.JobFactoryProducer; +import alluxio.master.journal.JournalContext; +import alluxio.master.journal.Journaled; +import alluxio.master.journal.checkpoint.CheckpointName; +import alluxio.proto.journal.Journal; +import alluxio.resource.CloseableIterator; +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobMetaStore; + +import com.google.common.collect.Iterators; + +import java.util.Set; + +/** + * A journaled job meta store. + */ +public class JournaledJobMetaStore implements JobMetaStore, Journaled { + private final FileSystemMaster mFileSystemMaster; + private final Set> + mExistingJobs = new ConcurrentHashSet<>(); + + /** + * Creates a new instance of {@link JournaledJobMetaStore}. + * @param fileSystemMaster the file system master + */ + public JournaledJobMetaStore(FileSystemMaster fileSystemMaster) { + mFileSystemMaster = fileSystemMaster; + } + + @Override + public CloseableIterator getJournalEntryIterator() { + return CloseableIterator.noopCloseable( + Iterators.transform(mExistingJobs.iterator(), Job::toJournalEntry)); + } + + @Override + public boolean processJournalEntry(Journal.JournalEntry entry) { + if (!entry.hasLoadJob()) { + return false; + } + Job job = JobFactoryProducer + .create(entry, mFileSystemMaster).create(); + mExistingJobs.add(job); + return true; + } + + @Override + public void resetState() { + mExistingJobs.clear(); + } + + @Override + public CheckpointName getCheckpointName() { + return CheckpointName.SCHEDULER; + } + + @Override + public void updateJob(Job job) { + try (JournalContext context = mFileSystemMaster.createJournalContext()) { + context.append(job.toJournalEntry()); + mExistingJobs.add(job); + } catch (UnavailableException e) { + throw new UnavailableRuntimeException( + "There is an ongoing backup running, please submit later", e); + } + } + + @Override + public Set> getJobs() { + return mExistingJobs; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java b/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java new file mode 100644 index 000000000000..ab230e7715ce --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java @@ -0,0 +1,402 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.scheduler; + +import static java.lang.String.format; + +import alluxio.Constants; +import alluxio.client.block.stream.BlockWorkerClient; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.exception.runtime.NotFoundRuntimeException; +import alluxio.exception.runtime.ResourceExhaustedRuntimeException; +import alluxio.exception.runtime.UnavailableRuntimeException; +import alluxio.grpc.JobProgressReportFormat; +import alluxio.job.JobDescription; +import alluxio.resource.CloseableResource; +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobMetaStore; +import alluxio.scheduler.job.JobState; +import alluxio.scheduler.job.Task; +import alluxio.scheduler.job.WorkerProvider; +import alluxio.util.ThreadFactoryUtils; +import alluxio.util.ThreadUtils; +import alluxio.wire.WorkerInfo; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashSet; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import javax.annotation.concurrent.ThreadSafe; + +/** + * The Scheduler which controls jobs. It is responsible for managing active workers, updating jobs + * and update job information to job meta store. + * The workflow is: + * 1. Submit a job to the scheduler. + * 2. The scheduler will pull the task from the job and assign the task to a worker. + * 3. The worker will execute the task and report the result to the job. + * 4. The job will update the progress. And schedule the next task if the job is not done. + * 5. One worker would have one task running for one job description at a time. + */ +@ThreadSafe +public final class Scheduler { + + private static final Logger LOG = LoggerFactory.getLogger(Scheduler.class); + private static final int CAPACITY = 100; + private static final long WORKER_UPDATE_INTERVAL = Configuration.getMs( + PropertyKey.MASTER_WORKER_INFO_CACHE_REFRESH_TIME); + private static final int EXECUTOR_SHUTDOWN_MS = 10 * Constants.SECOND_MS; + private final Map> + mExistingJobs = new ConcurrentHashMap<>(); + private final Map, Set> mRunningTasks = new ConcurrentHashMap<>(); + private final JobMetaStore mJobMetaStore; + // initial thread in start method since we would stop and start thread when gainPrimacy + private ScheduledExecutorService mSchedulerExecutor; + private volatile boolean mRunning = false; + private Map> mActiveWorkers = ImmutableMap.of(); + private final WorkerProvider mWorkerProvider; + + /** + * Creates a new instance of {@link Scheduler}. + * + * @param workerProvider interface for providing worker information and client + * @param jobMetaStore job meta store that store job information + */ + public Scheduler(WorkerProvider workerProvider, JobMetaStore jobMetaStore) { + mWorkerProvider = workerProvider; + mJobMetaStore = jobMetaStore; + } + + /** + * Start scheduler. + */ + public void start() { + if (!mRunning) { + retrieveJobs(); + mSchedulerExecutor = Executors.newSingleThreadScheduledExecutor( + ThreadFactoryUtils.build("scheduler", false)); + mSchedulerExecutor.scheduleAtFixedRate(this::updateWorkers, 0, WORKER_UPDATE_INTERVAL, + TimeUnit.MILLISECONDS); + mSchedulerExecutor.scheduleWithFixedDelay(this::processJobs, 0, 100, TimeUnit.MILLISECONDS); + mSchedulerExecutor.scheduleWithFixedDelay(this::cleanupStaleJob, 1, 1, TimeUnit.HOURS); + mRunning = true; + } + } + + private void retrieveJobs() { + for (Job job : mJobMetaStore.getJobs()) { + mExistingJobs.put(job.getDescription(), job); + if (job.isDone()) { + mRunningTasks.remove(job); + } + else { + mRunningTasks.put(job, new HashSet<>()); + } + } + } + + /** + * Stop scheduler. + */ + public void stop() { + if (mRunning) { + mActiveWorkers.values().forEach(CloseableResource::close); + mActiveWorkers = ImmutableMap.of(); + ThreadUtils.shutdownAndAwaitTermination(mSchedulerExecutor, EXECUTOR_SHUTDOWN_MS); + mRunning = false; + } + } + + /** + * Submit a job. + * @param job the job + * @return true if the job is new, false if the job has already been submitted + * @throws ResourceExhaustedRuntimeException if the job cannot be submitted because the scheduler + * is at capacity + * @throws UnavailableRuntimeException if the job cannot be submitted because the meta store is + * not ready + */ + public boolean submitJob(Job job) { + Job existingJob = mExistingJobs.get(job.getDescription()); + if (existingJob != null && !existingJob.isDone()) { + updateExistingJob(job, existingJob); + return false; + } + + if (mRunningTasks.size() >= CAPACITY) { + throw new ResourceExhaustedRuntimeException( + "Too many jobs running, please submit later.", true); + } + mJobMetaStore.updateJob(job); + mExistingJobs.put(job.getDescription(), job); + mRunningTasks.put(job, new HashSet<>()); + LOG.debug(format("start job: %s", job)); + return true; + } + + private void updateExistingJob(Job newJob, Job existingJob) { + existingJob.updateJob(newJob); + mJobMetaStore.updateJob(existingJob); + LOG.debug(format("updated existing job: %s from %s", existingJob, newJob)); + if (existingJob.getJobState() == JobState.STOPPED) { + existingJob.setJobState(JobState.RUNNING); + mRunningTasks.put(existingJob, new HashSet<>()); + LOG.debug(format("restart existing job: %s", existingJob)); + } + } + + /** + * Stop a job. + * @param jobDescription job identifier + * @return true if the job is stopped, false if the job does not exist or has already finished + */ + public boolean stopJob(JobDescription jobDescription) { + Job existingJob = mExistingJobs.get(jobDescription); + if (existingJob != null && existingJob.isRunning()) { + existingJob.setJobState(JobState.STOPPED); + mJobMetaStore.updateJob(existingJob); + // leftover tasks in mRunningTasks would be removed by scheduling thread. + return true; + } + return false; + } + + /** + * Get the job's progress report. + * @param jobDescription job identifier + * @param format progress report format + * @param verbose whether to include details on failed files and failures + * @return the progress report + * @throws NotFoundRuntimeException if the job does not exist + * @throws AlluxioRuntimeException if any other Alluxio exception occurs + */ + public String getJobProgress( + JobDescription jobDescription, + JobProgressReportFormat format, + boolean verbose) { + Job job = mExistingJobs.get(jobDescription); + if (job == null) { + throw new NotFoundRuntimeException(format("%s cannot be found.", jobDescription)); + } + return job.getProgress(format, verbose); + } + + /** + * Get active workers. + * @return active workers + */ + @VisibleForTesting + public Map> getActiveWorkers() { + return mActiveWorkers; + } + + /** + * Removes all finished jobs outside the retention time. + */ + @VisibleForTesting + public void cleanupStaleJob() { + long current = System.currentTimeMillis(); + mExistingJobs + .entrySet().removeIf(job -> !job.getValue().isRunning() + && job.getValue().getEndTime().isPresent() + && job.getValue().getEndTime().getAsLong() <= (current - Configuration.getMs( + PropertyKey.JOB_RETENTION_TIME))); + } + + /** + * Refresh active workers. + */ + @VisibleForTesting + public void updateWorkers() { + if (Thread.currentThread().isInterrupted()) { + return; + } + Set workerInfos; + try { + try { + workerInfos = ImmutableSet.copyOf(mWorkerProvider.getWorkerInfos()); + } catch (AlluxioRuntimeException e) { + LOG.warn("Failed to get worker info, using existing worker infos of {} workers", + mActiveWorkers.size()); + return; + } + if (workerInfos.size() == mActiveWorkers.size() + && workerInfos.containsAll(mActiveWorkers.keySet())) { + return; + } + + ImmutableMap.Builder> updatedWorkers = + ImmutableMap.builder(); + for (WorkerInfo workerInfo : workerInfos) { + if (mActiveWorkers.containsKey(workerInfo)) { + updatedWorkers.put(workerInfo, mActiveWorkers.get(workerInfo)); + } + else { + try { + updatedWorkers.put(workerInfo, + mWorkerProvider.getWorkerClient(workerInfo.getAddress())); + } catch (AlluxioRuntimeException e) { + // skip the worker if we cannot obtain a client + } + } + } + // Close clients connecting to lost workers + for (Map.Entry> entry : + mActiveWorkers.entrySet()) { + WorkerInfo workerInfo = entry.getKey(); + if (!workerInfos.contains(workerInfo)) { + CloseableResource resource = entry.getValue(); + resource.close(); + LOG.debug("Closed BlockWorkerClient to lost worker {}", workerInfo); + } + } + // Build the clients to the current active worker list + mActiveWorkers = updatedWorkers.build(); + } catch (Exception e) { + // Unknown exception. This should not happen, but if it happens we don't want to lose the + // scheduler thread, thus catching it here. Any exception surfaced here should be properly + // handled. + LOG.error("Unexpected exception thrown in updateWorkers.", e); + } + } + + /** + * Get jobs. + * + * @return jobs + */ + @VisibleForTesting + public Map> getJobs() { + return mExistingJobs; + } + + private void processJobs() { + if (Thread.currentThread().isInterrupted()) { + return; + } + mRunningTasks.forEach(this::processJob); + } + + private void processJob(Job job, Set runningWorkers) { + try { + if (!job.isRunning()) { + try { + mJobMetaStore.updateJob(job); + } + catch (UnavailableRuntimeException e) { + // This should not happen because the scheduler should not be started while master is + // still processing journal entries. However, if it does happen, we don't want to throw + // exception in a task running on scheduler thead. So just ignore it and hopefully later + // retry will work. + LOG.error("error writing to journal when processing job", e); + } + mRunningTasks.remove(job); + return; + } + if (!job.isHealthy()) { + job.failJob(new InternalRuntimeException("Job failed because it's not healthy.")); + return; + } + + // If there are new workers, schedule job onto new workers + mActiveWorkers.forEach((workerInfo, workerClient) -> { + if (!runningWorkers.contains(workerInfo) && scheduleTask(job, workerInfo, runningWorkers, + workerClient)) { + runningWorkers.add(workerInfo); + } + }); + + if (runningWorkers.isEmpty() && job.isCurrentPassDone()) { + if (job.needVerification()) { + job.initiateVerification(); + } + else { + if (job.isHealthy()) { + job.setJobSuccess(); + } + else { + job.failJob(new InternalRuntimeException("Job failed because it's not healthy.")); + } + } + } + } catch (Exception e) { + // Unknown exception. This should not happen, but if it happens we don't want to lose the + // scheduler thread, thus catching it here. Any exception surfaced here should be properly + // handled. + LOG.error("Unexpected exception thrown in processJob.", e); + job.failJob(new InternalRuntimeException(e)); + } + } + + private boolean scheduleTask( + @SuppressWarnings("rawtypes") Job job, + WorkerInfo workerInfo, + Set livingWorkers, + CloseableResource workerClient) { + if (!job.isRunning()) { + return false; + } + Optional> task; + try { + task = job.getNextTask(workerInfo); + } catch (AlluxioRuntimeException e) { + LOG.warn(format("error getting next task for job %s", job), e); + if (!e.isRetryable()) { + job.failJob(e); + } + return false; + } + if (!task.isPresent()) { + return false; + } + Task currentTask = task.get(); + currentTask.execute(workerClient.get()); + currentTask.getResponseFuture().addListener(() -> { + try { + if (!job.processResponse(currentTask)) { + livingWorkers.remove(workerInfo); + } + // Schedule next batch for healthy job + if (job.isHealthy()) { + if (mActiveWorkers.containsKey(workerInfo)) { + if (!scheduleTask(job, workerInfo, livingWorkers, mActiveWorkers.get(workerInfo))) { + livingWorkers.remove(workerInfo); + } + } + else { + livingWorkers.remove(workerInfo); + } + } + } catch (Exception e) { + // Unknown exception. This should not happen, but if it happens we don't want to lose the + // scheduler thread, thus catching it here. Any exception surfaced here should be properly + // handled. + LOG.error("Unexpected exception thrown in response future listener.", e); + job.failJob(new InternalRuntimeException(e)); + } + }, mSchedulerExecutor); + return true; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java index 0d750c22cd85..f51f2089d72b 100644 --- a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java +++ b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java @@ -17,7 +17,10 @@ import alluxio.grpc.ErrorType; import alluxio.grpc.GrpcServer; import alluxio.grpc.GrpcServerBuilder; +import alluxio.grpc.GrpcService; +import alluxio.grpc.ServiceType; import alluxio.master.AlluxioExecutorService; +import alluxio.master.Master; import alluxio.master.MasterProcess; import alluxio.master.MasterRegistry; import alluxio.master.SafeModeManager; @@ -34,8 +37,10 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.net.Socket; +import java.util.Map; import java.util.Optional; import java.util.concurrent.TimeUnit; +import java.util.function.Function; import javax.annotation.Nullable; import javax.annotation.concurrent.GuardedBy; @@ -70,11 +75,29 @@ protected RpcServerService(InetSocketAddress bindAddress, MasterProcess masterPr mMasterProcess = masterProcess; } + protected final synchronized boolean isGrpcServerServing() { + return mGrpcServer != null && mGrpcServer.isServing(); + } + /** * @return whether the grpc server is serving or not */ public synchronized boolean isServing() { - return mGrpcServer != null && mGrpcServer.isServing(); + return isServingLeader() || isServingStandby(); + } + + /** + * @return whether the grpc server is serving in leader mode + */ + public synchronized boolean isServingLeader() { + return isGrpcServerServing(); + } + + /** + * @return whether the grpc server is serving in standby mode + */ + public synchronized boolean isServingStandby() { + return false; } @Override @@ -89,6 +112,11 @@ public synchronized void promote() { Preconditions.checkState(mGrpcServer == null, "rpc server must not be running"); stopRejectingServer(); waitForFree(); + startGrpcServer(Master::getServices); + } + + protected synchronized void startGrpcServer( + Function> serviceProvider) { GrpcServerBuilder builder = mMasterProcess.createBaseRpcServer(); Optional executorService = mMasterProcess.createRpcExecutorService(); if (executorService.isPresent()) { @@ -96,12 +124,12 @@ public synchronized void promote() { mRpcExecutor = executorService.get(); } mMasterRegistry.getServers().forEach(master -> { - master.getServices().forEach((type, service) -> { + serviceProvider.apply(master).forEach((type, service) -> { builder.addService(type, service); LOG.info("registered service {}", type.name()); }); }); - mGrpcServer = builder.build(); + mGrpcServer = builder.build(() -> mMasterProcess.getPrimarySelector().getStateUnsafe()); try { mGrpcServer.start(); mMasterProcess.getSafeModeManager().ifPresent(SafeModeManager::notifyRpcServerStarted); @@ -209,6 +237,9 @@ public static RpcServerService create( InetSocketAddress bindAddress, MasterProcess masterProcess, MasterRegistry masterRegistry) { + if (Configuration.getBoolean(PropertyKey.STANDBY_MASTER_GRPC_ENABLED)) { + return new RpcServerStandbyGrpcService(bindAddress, masterProcess, masterRegistry); + } return new RpcServerService(bindAddress, masterProcess, masterRegistry); } } diff --git a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerStandbyGrpcService.java b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerStandbyGrpcService.java new file mode 100644 index 000000000000..f25743f24851 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerStandbyGrpcService.java @@ -0,0 +1,88 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.service.rpc; + +import alluxio.master.Master; +import alluxio.master.MasterProcess; +import alluxio.master.MasterRegistry; + +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.InetSocketAddress; + +/** + * Created by {@link RpcServerService.Factory}. + * Manages the behavior of the master's rpc service. The grpc server is always on. + * When the promotion/demotion happens, the rpc service will be stopped and restarted. + * The new started grpc service will serve gRPC endpoints based on the node state (PRIMARY/STANDBY). + * No rejecting server is deployed. + */ +public class RpcServerStandbyGrpcService extends RpcServerService { + protected static final Logger LOG = LoggerFactory.getLogger(RpcServerStandbyGrpcService.class); + + private boolean mIsPromoted = false; + + protected RpcServerStandbyGrpcService( + InetSocketAddress bindAddress, + MasterProcess masterProcess, + MasterRegistry masterRegistry + ) { + super(bindAddress, masterProcess, masterRegistry); + } + + @Override + public synchronized boolean isServingLeader() { + return mIsPromoted && isGrpcServerServing(); + } + + @Override + public synchronized boolean isServingStandby() { + return !mIsPromoted && isGrpcServerServing(); + } + + @Override + public synchronized void start() { + LOG.info("Starting {}", this.getClass().getSimpleName()); + startGrpcServer(Master::getStandbyServices); + } + + @Override + public synchronized void stop() { + stopGrpcServer(); + stopRpcExecutor(); + mIsPromoted = false; + } + + @Override + public synchronized void promote() { + Preconditions.checkState(!mIsPromoted, "double promotion is not allowed"); + LOG.info("Promoting {}", this.getClass().getSimpleName()); + stopGrpcServer(); + stopRpcExecutor(); + waitForFree(); + startGrpcServer(Master::getServices); + mIsPromoted = true; + } + + @Override + public synchronized void demote() { + Preconditions.checkState(mIsPromoted, "double demotion is not allowed"); + LOG.info("Demoting {}", this.getClass().getSimpleName()); + stopGrpcServer(); + stopRpcExecutor(); + waitForFree(); + startGrpcServer(Master::getStandbyServices); + mIsPromoted = false; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/throttle/DefaultThrottleMaster.java b/core/server/master/src/main/java/alluxio/master/throttle/DefaultThrottleMaster.java index 0c7385373f4a..ef5eee6f489c 100644 --- a/core/server/master/src/main/java/alluxio/master/throttle/DefaultThrottleMaster.java +++ b/core/server/master/src/main/java/alluxio/master/throttle/DefaultThrottleMaster.java @@ -19,6 +19,7 @@ import alluxio.conf.PropertyKey; import alluxio.grpc.GrpcService; import alluxio.grpc.ServiceType; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatExecutor; import alluxio.heartbeat.HeartbeatThread; @@ -109,7 +110,8 @@ public void start(Boolean isLeader) throws IOException { LOG.info("Starting {}", getName()); mThrottleService = getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_THROTTLE, mThrottleExecutor, - Configuration.getMs(PropertyKey.MASTER_THROTTLE_HEARTBEAT_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_THROTTLE_HEARTBEAT_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); LOG.info("{} is started", getName()); @@ -141,7 +143,7 @@ public ThrottleExecutor(MasterProcess masterProcess) { } @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { mSystemMonitor.run(); } diff --git a/core/server/master/src/test/java/alluxio/master/AlluxioMasterProcessTest.java b/core/server/master/src/test/java/alluxio/master/AlluxioMasterProcessTest.java index 3740f135f9d7..2ef1438ad8fb 100644 --- a/core/server/master/src/test/java/alluxio/master/AlluxioMasterProcessTest.java +++ b/core/server/master/src/test/java/alluxio/master/AlluxioMasterProcessTest.java @@ -127,7 +127,10 @@ public void startStopPrimary() throws Exception { } }); t.start(); + master.waitForReady(10_000); startStopTest(master); + t.interrupt(); + t.join(); } @Test @@ -256,6 +259,31 @@ public void restoreFromBackupLocal() throws Exception { startStopTest(master); } + @Test + public void startStopStandbyStandbyServer() throws Exception { + Configuration.set(PropertyKey.STANDBY_MASTER_GRPC_ENABLED, true); + AlluxioMasterProcess master = + new AlluxioMasterProcess(new NoopJournalSystem(), new AlwaysStandbyPrimarySelector()); + master.registerService( + RpcServerService.Factory.create( + master.getRpcBindAddress(), master, master.getRegistry())); + master.registerService(WebServerService.Factory.create(master.getWebBindAddress(), master)); + master.registerService(MetricsService.Factory.create()); + + Thread t = new Thread(() -> { + try { + master.start(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + t.start(); + startStopTest(master, + true, + Configuration.getBoolean(PropertyKey.STANDBY_MASTER_WEB_ENABLED), + Configuration.getBoolean(PropertyKey.STANDBY_MASTER_METRICS_SINK_ENABLED)); + } + private void startStopTest(AlluxioMasterProcess master) throws Exception { startStopTest(master, true, true, true); } @@ -269,7 +297,10 @@ private void startStopTest(AlluxioMasterProcess master, boolean expectGrpcServic assertTrue(isBound(master.getRpcAddress().getPort())); assertTrue(isBound(master.getWebAddress().getPort())); if (expectGrpcServiceStarted) { - assertTrue(master.waitForGrpcServerReady(TIMEOUT_MS)); + CommonUtils.waitFor("grpc server to serve", + () -> master.mServices.stream().anyMatch(service -> service instanceof RpcServerService + && ((RpcServerService) service).isServing()), + WaitForOptions.defaults().setTimeoutMs(TIMEOUT_MS)); } if (expectWebServiceStarted) { assertTrue(master.waitForWebServerReady(TIMEOUT_MS)); diff --git a/core/server/master/src/test/java/alluxio/master/AlwaysPrimaryPrimarySelector.java b/core/server/master/src/test/java/alluxio/master/AlwaysPrimaryPrimarySelector.java new file mode 100644 index 000000000000..48a68603510d --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/AlwaysPrimaryPrimarySelector.java @@ -0,0 +1,63 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master; + +import alluxio.grpc.NodeState; +import alluxio.util.interfaces.Scoped; + +import java.net.InetSocketAddress; +import java.util.function.Consumer; + +/** + * A test primary selector which is always primary. + */ +public final class AlwaysPrimaryPrimarySelector implements PrimarySelector { + @Override + public void start(InetSocketAddress localAddress) { + // Nothing to do. + } + + @Override + public void stop() { + // Nothing to do. + } + + @Override + public NodeState getState() { + return NodeState.PRIMARY; + } + + @Override + public NodeState getStateUnsafe() { + return NodeState.PRIMARY; + } + + @Override + public Scoped onStateChange(Consumer listener) { + // State never changes. + return () -> { }; + } + + @Override + public void waitForState(NodeState state) throws InterruptedException { + switch (state) { + case PRIMARY: + return; + case STANDBY: + // Never happening + Thread.sleep(Long.MAX_VALUE); + break; + default: + throw new IllegalStateException("Unknown primary selector state: " + state); + } + } +} diff --git a/core/server/master/src/test/java/alluxio/master/MasterTestUtils.java b/core/server/master/src/test/java/alluxio/master/MasterTestUtils.java index 552fc4f05292..2b843f368343 100644 --- a/core/server/master/src/test/java/alluxio/master/MasterTestUtils.java +++ b/core/server/master/src/test/java/alluxio/master/MasterTestUtils.java @@ -50,7 +50,36 @@ public static CoreMasterContext testMasterContext(JournalSystem journalSystem) { public static CoreMasterContext testMasterContext(JournalSystem journalSystem, UserState userState) { return testMasterContext(journalSystem, userState, - HeapBlockMetaStore::new, x -> new HeapInodeStore()); + HeapBlockMetaStore::new, x -> new HeapInodeStore(), new AlwaysStandbyPrimarySelector()); + } + + /** + * @return a basic master context for the purpose of testing + * @param journalSystem a journal system to use in the context + * @param userState the user state to use in the context + * @param primarySelector the primary selector + */ + public static CoreMasterContext testMasterContext(JournalSystem journalSystem, + UserState userState, PrimarySelector primarySelector) { + return testMasterContext(journalSystem, userState, + HeapBlockMetaStore::new, x -> new HeapInodeStore(), primarySelector); + } + + /** + * @return a basic master context for the purpose of testing + * @param journalSystem a journal system to use in the context + * @param userState the user state to use in the context + * @param blockStoreFactory a factory to create {@link BlockMetaStore} + * @param inodeStoreFactory a factory to create {@link InodeStore} + */ + public static CoreMasterContext testMasterContext( + JournalSystem journalSystem, UserState userState, + BlockMetaStore.Factory blockStoreFactory, + InodeStore.Factory inodeStoreFactory + ) { + return testMasterContext( + journalSystem, userState, blockStoreFactory, + inodeStoreFactory, new AlwaysPrimaryPrimarySelector()); } /** @@ -59,14 +88,17 @@ public static CoreMasterContext testMasterContext(JournalSystem journalSystem, * @param userState the user state to use in the context * @param blockStoreFactory a factory to create {@link BlockMetaStore} * @param inodeStoreFactory a factory to create {@link InodeStore} + * @param primarySelector the primary selector */ public static CoreMasterContext testMasterContext( JournalSystem journalSystem, UserState userState, BlockMetaStore.Factory blockStoreFactory, - InodeStore.Factory inodeStoreFactory) { + InodeStore.Factory inodeStoreFactory, + PrimarySelector primarySelector + ) { return CoreMasterContext.newBuilder() .setJournalSystem(journalSystem) - .setPrimarySelector(new AlwaysStandbyPrimarySelector()) + .setPrimarySelector(primarySelector) .setUserState(userState) .setSafeModeManager(new TestSafeModeManager()) .setBackupManager(mock(BackupManager.class)) diff --git a/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java b/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java index 900c179a48c4..d1cbb7a2a686 100644 --- a/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java +++ b/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java @@ -11,27 +11,39 @@ package alluxio.master.block; +import static alluxio.stress.rpc.TierAlias.MEM; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import alluxio.Constants; +import alluxio.client.block.options.GetWorkerReportOptions; import alluxio.clock.ManualClock; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.exception.BlockInfoException; +import alluxio.exception.ExceptionMessage; import alluxio.exception.status.NotFoundException; import alluxio.grpc.BuildVersion; import alluxio.grpc.Command; import alluxio.grpc.CommandType; +import alluxio.grpc.ConfigProperty; +import alluxio.grpc.DecommissionWorkerPOptions; import alluxio.grpc.RegisterWorkerPOptions; +import alluxio.grpc.RegisterWorkerPRequest; +import alluxio.grpc.RegisterWorkerPResponse; import alluxio.grpc.StorageList; import alluxio.grpc.WorkerLostStorageInfo; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatScheduler; import alluxio.heartbeat.ManuallyScheduleHeartbeat; +import alluxio.master.AlwaysPrimaryPrimarySelector; import alluxio.master.CoreMasterContext; import alluxio.master.MasterRegistry; import alluxio.master.MasterTestUtils; +import alluxio.master.WorkerState; +import alluxio.master.block.meta.MasterWorkerInfo; import alluxio.master.journal.JournalSystem; import alluxio.master.journal.noop.NoopJournalSystem; import alluxio.master.metrics.MetricsMaster; @@ -44,11 +56,14 @@ import alluxio.wire.BlockLocation; import alluxio.wire.WorkerInfo; import alluxio.wire.WorkerNetAddress; +import alluxio.worker.block.BlockStoreLocation; +import alluxio.worker.block.RegisterStreamer; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import io.grpc.stub.StreamObserver; import org.junit.After; import org.junit.Before; import org.junit.ClassRule; @@ -61,6 +76,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -72,6 +89,7 @@ * Unit tests for {@link BlockMaster}. */ public class BlockMasterTest { + public static final long CAPACITY = 20L * 1024 * 1024 * 1024; // 20GB private static final WorkerNetAddress NET_ADDRESS_1 = new WorkerNetAddress().setHost("localhost") .setRpcPort(80).setDataPort(81).setWebPort(82); private static final WorkerNetAddress NET_ADDRESS_2 = new WorkerNetAddress().setHost("localhost") @@ -81,12 +99,20 @@ public class BlockMasterTest { private static final Map> NO_BLOCKS_ON_LOCATION = ImmutableMap.of(); private static final Map NO_LOST_STORAGE = ImmutableMap.of(); + public static final Map> LOST_STORAGE = + ImmutableMap.of(MEM.toString(), ImmutableList.of()); + public static final List EMPTY_CONFIG = ImmutableList.of(); + public static final int BATCH_SIZE = 1000; + + public static final BuildVersion OLD_VERSION = BuildVersion.newBuilder().setVersion("1.0.0") + .setRevision("foobar").build(); + public static final BuildVersion NEW_VERSION = BuildVersion.newBuilder().setVersion("1.1.0") + .setRevision("foobaz").build(); private BlockMaster mBlockMaster; private MasterRegistry mRegistry; private ManualClock mClock; private ExecutorService mExecutorService; - private ExecutorService mClientExecutorService; private MetricsMaster mMetricsMaster; private List mMetrics; @@ -113,7 +139,9 @@ public void before() throws Exception { mRegistry = new MasterRegistry(); mMetrics = Lists.newArrayList(); JournalSystem journalSystem = new NoopJournalSystem(); - CoreMasterContext masterContext = MasterTestUtils.testMasterContext(); + CoreMasterContext masterContext = MasterTestUtils.testMasterContext( + new NoopJournalSystem(), null, new AlwaysPrimaryPrimarySelector() + ); mMetricsMaster = new MetricsMasterFactory().create(mRegistry, masterContext); mClock = new ManualClock(); mExecutorService = @@ -195,7 +223,30 @@ public void countBytes() throws Exception { } @Test - public void detectLostWorkers() throws Exception { + public void detectLostWorker() throws Exception { + // Register a worker. + long worker1 = mBlockMaster.getWorkerId(NET_ADDRESS_1); + mBlockMaster.workerRegister(worker1, + ImmutableList.of(Constants.MEDIUM_MEM), + ImmutableMap.of(Constants.MEDIUM_MEM, 100L), + ImmutableMap.of(Constants.MEDIUM_MEM, 10L), + NO_BLOCKS_ON_LOCATION, + NO_LOST_STORAGE, + RegisterWorkerPOptions.getDefaultInstance()); + + // Advance the block master's clock by an hour so that worker appears lost. + mClock.setTimeMs(System.currentTimeMillis() + Constants.HOUR_MS); + + // Run the lost worker detector. + HeartbeatScheduler.execute(HeartbeatContext.MASTER_LOST_WORKER_DETECTION); + + // Make sure the worker is detected as lost. + List info = mBlockMaster.getLostWorkersInfoList(); + assertEquals(worker1, Iterables.getOnlyElement(info).getId()); + } + + @Test + public void decommissionWorker() throws Exception { // Register a worker. long worker1 = mBlockMaster.getWorkerId(NET_ADDRESS_1); mBlockMaster.workerRegister(worker1, @@ -206,6 +257,33 @@ public void detectLostWorkers() throws Exception { NO_LOST_STORAGE, RegisterWorkerPOptions.getDefaultInstance()); + // Decommission worker + DecommissionWorkerPOptions options = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .build(); + mBlockMaster.decommissionWorker(options); + + // Make sure the worker is decommissioned. + int decommissionedCount = mBlockMaster.getDecommissionedWorkerCount(); + int liveCount = mBlockMaster.getWorkerCount(); + int lostCount = mBlockMaster.getLostWorkerCount(); + assertEquals(1, decommissionedCount); + assertEquals(0, liveCount); + assertEquals(0, lostCount); + } + + @Test + public void decommissionLostWorker() throws Exception { + // Register a worker. + long worker1 = mBlockMaster.getWorkerId(NET_ADDRESS_1); + mBlockMaster.workerRegister(worker1, + ImmutableList.of(Constants.MEDIUM_MEM), + ImmutableMap.of(Constants.MEDIUM_MEM, 100L), + ImmutableMap.of(Constants.MEDIUM_MEM, 10L), + NO_BLOCKS_ON_LOCATION, + NO_LOST_STORAGE, + RegisterWorkerPOptions.getDefaultInstance()); + // Advance the block master's clock by an hour so that worker appears lost. mClock.setTimeMs(System.currentTimeMillis() + Constants.HOUR_MS); @@ -215,6 +293,514 @@ public void detectLostWorkers() throws Exception { // Make sure the worker is detected as lost. List info = mBlockMaster.getLostWorkersInfoList(); assertEquals(worker1, Iterables.getOnlyElement(info).getId()); + + // Decommission worker + DecommissionWorkerPOptions options = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .build(); + mBlockMaster.decommissionWorker(options); + + // Make sure the worker is decommissioned. + int decommissionedCount = mBlockMaster.getDecommissionedWorkerCount(); + int liveCount = mBlockMaster.getWorkerCount(); + int lostCount = mBlockMaster.getLostWorkerCount(); + assertEquals(1, decommissionedCount); + assertEquals(0, liveCount); + assertEquals(0, lostCount); + } + + @Test + public void decommissionCommitUpgradeRegister() throws Exception { + long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); + RegisterWorkerPOptions options = RegisterWorkerPOptions.newBuilder() + .setBuildVersion(OLD_VERSION).build(); + mBlockMaster.workerRegister(workerId, + ImmutableList.of(Constants.MEDIUM_MEM), + ImmutableMap.of(Constants.MEDIUM_MEM, 100L), + ImmutableMap.of(Constants.MEDIUM_MEM, 0L), + NO_BLOCKS_ON_LOCATION, + NO_LOST_STORAGE, + options); + List liveWorkerInfo = mBlockMaster.getWorkerInfoList(); + List allWorkerInfo = mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerInfo.size()); + assertEquals(1, allWorkerInfo.size()); + WorkerInfo w = liveWorkerInfo.get(0); + assertEquals(WorkerState.LIVE.toString(), w.getState()); + assertEquals(OLD_VERSION.getVersion(), w.getVersion()); + assertEquals(OLD_VERSION.getRevision(), w.getRevision()); + + // Decommission the worker + DecommissionWorkerPOptions decomReq = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .setCanRegisterAgain(true) + .build(); + mBlockMaster.decommissionWorker(decomReq); + List liveWorkersAfterDecom = mBlockMaster.getWorkerInfoList(); + assertEquals(0, liveWorkersAfterDecom.size()); + List allWorkersAfterDecom = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, allWorkersAfterDecom.size()); + WorkerInfo decomWorker = allWorkersAfterDecom.get(0); + assertEquals(WorkerState.DECOMMISSIONED.toString(), decomWorker.getState()); + assertEquals(OLD_VERSION.getVersion(), decomWorker.getVersion()); + assertEquals(OLD_VERSION.getRevision(), decomWorker.getRevision()); + + // After decommissioned, the worker can still heartbeat to the master + Map memUsage = ImmutableMap.of(Constants.MEDIUM_MEM, 0L); + alluxio.grpc.Command heartBeat = mBlockMaster.workerHeartbeat(workerId, null, memUsage, + NO_BLOCKS, NO_BLOCKS_ON_LOCATION, NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeat.getCommandType()); + + // The leftover operations on the worker can still commit blocks to the master + long blockId = 1L; + long blockLength = 100L; + mBlockMaster.commitBlock(workerId, blockLength, "MEM", "MEM", blockId, blockLength); + // The block can be found on the master + BlockInfo blockInfo = mBlockMaster.getBlockInfo(blockId); + assertNotNull(blockInfo); + assertEquals(blockInfo.getLength(), blockLength); + // Although the block can successfully commit, the available locations do not include + // the decommissioned worker, so clients will not read from that worker for that block + assertEquals(0, blockInfo.getLocations().size()); + + // Heartbeat to the master again, the master does not remove the block incorrectly + Map memUsageWithBlock = ImmutableMap.of(Constants.MEDIUM_MEM, blockLength); + List memBlockList = ImmutableList.of(blockId); + Block.BlockLocation memTier = Block.BlockLocation.newBuilder() + .setTier("MEM").setMediumType("MEM").setWorkerId(workerId).build(); + alluxio.grpc.Command heartBeatAgain = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeatAgain.getCommandType()); + + // The worker registers again with a higher version + RegisterWorkerPOptions upgradedWorker = RegisterWorkerPOptions.newBuilder() + .setBuildVersion(NEW_VERSION).build(); + mBlockMaster.workerRegister(workerId, + ImmutableList.of(Constants.MEDIUM_MEM), + memUsageWithBlock, + memUsageWithBlock, + ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, + upgradedWorker); + List liveWorkerAfterRestart = mBlockMaster.getWorkerInfoList(); + List allWorkerAfterRestart = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerAfterRestart.size()); + assertEquals(1, allWorkerAfterRestart.size()); + WorkerInfo restartedWorker = liveWorkerAfterRestart.get(0); + assertEquals(WorkerState.LIVE.toString(), restartedWorker.getState()); + assertEquals(NEW_VERSION.getVersion(), restartedWorker.getVersion()); + assertEquals(NEW_VERSION.getRevision(), restartedWorker.getRevision()); + MasterWorkerInfo upgradedWorkerInfo = mBlockMaster.getWorker(workerId); + assertEquals(1, upgradedWorkerInfo.getBlockCount()); + BlockInfo blockInfoCheckAgain = mBlockMaster.getBlockInfo(blockId); + assertNotNull(blockInfoCheckAgain); + assertEquals(blockInfoCheckAgain.getLength(), blockLength); + // The block can be found on the decommissioned worker once the worker registers + // again after the upgrade + assertEquals(1, blockInfoCheckAgain.getLocations().size()); + BlockLocation locCheckAgain = blockInfoCheckAgain.getLocations().get(0); + assertEquals(workerId, locCheckAgain.getWorkerId()); + + // Heartbeat to the master again, the master does not remove the block incorrectly + alluxio.grpc.Command heartBeatAfterUpgrade = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Nothing, heartBeatAfterUpgrade.getCommandType()); + } + + @Test + public void decommissionCommitUpgradeStreamRegister() throws Exception { + long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); + BlockMasterWorkerServiceHandler handler = new BlockMasterWorkerServiceHandler(mBlockMaster); + Queue errors = + streamRegisterWorkerWithVersion(handler, workerId, 0L, ImmutableList.of(), OLD_VERSION); + assertEquals(0, errors.size()); + + List liveWorkerInfo = mBlockMaster.getWorkerInfoList(); + List allWorkerInfo = mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerInfo.size()); + assertEquals(1, allWorkerInfo.size()); + WorkerInfo w = liveWorkerInfo.get(0); + assertEquals(WorkerState.LIVE.toString(), w.getState()); + assertEquals(OLD_VERSION.getVersion(), w.getVersion()); + assertEquals(OLD_VERSION.getRevision(), w.getRevision()); + + // Decommission the worker + DecommissionWorkerPOptions decomReq = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .setCanRegisterAgain(true) + .build(); + mBlockMaster.decommissionWorker(decomReq); + List liveWorkersAfterDecom = mBlockMaster.getWorkerInfoList(); + assertEquals(0, liveWorkersAfterDecom.size()); + List allWorkersAfterDecom = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, allWorkersAfterDecom.size()); + WorkerInfo decomWorker = allWorkersAfterDecom.get(0); + assertEquals(WorkerState.DECOMMISSIONED.toString(), decomWorker.getState()); + assertEquals(OLD_VERSION.getVersion(), decomWorker.getVersion()); + assertEquals(OLD_VERSION.getRevision(), decomWorker.getRevision()); + + // After decommissioned, the worker can still heartbeat to the master + Map memUsage = ImmutableMap.of(Constants.MEDIUM_MEM, 0L); + alluxio.grpc.Command heartBeat = mBlockMaster.workerHeartbeat(workerId, null, memUsage, + NO_BLOCKS, NO_BLOCKS_ON_LOCATION, NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeat.getCommandType()); + + // The leftover operations on the worker can still commit blocks to the master + long blockId = 1L; + long blockLength = 100L; + mBlockMaster.commitBlock(workerId, blockLength, "MEM", "MEM", blockId, blockLength); + // The block can be found on the master + BlockInfo blockInfo = mBlockMaster.getBlockInfo(blockId); + assertNotNull(blockInfo); + assertEquals(blockInfo.getLength(), blockLength); + // Although the block can successfully commit, the available locations do not include + // the decommissioned worker, so clients will not read from that worker for that block + assertEquals(0, blockInfo.getLocations().size()); + + // Heartbeat to the master again, the master does not remove the block incorrectly + Map memUsageWithBlock = ImmutableMap.of(Constants.MEDIUM_MEM, blockLength); + List memBlockList = ImmutableList.of(blockId); + Block.BlockLocation memTier = Block.BlockLocation.newBuilder() + .setTier("MEM").setMediumType("MEM").setWorkerId(workerId).build(); + alluxio.grpc.Command heartBeatAgain = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeatAgain.getCommandType()); + + // The worker registers again with a higher version + errors = streamRegisterWorkerWithVersion(handler, workerId, blockLength, + ImmutableList.of(blockId), NEW_VERSION); + assertEquals(0, errors.size()); + List liveWorkerAfterRestart = mBlockMaster.getWorkerInfoList(); + List allWorkerAfterRestart = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerAfterRestart.size()); + assertEquals(1, allWorkerAfterRestart.size()); + WorkerInfo restartedWorker = liveWorkerAfterRestart.get(0); + assertEquals(WorkerState.LIVE.toString(), restartedWorker.getState()); + assertEquals(NEW_VERSION.getVersion(), restartedWorker.getVersion()); + assertEquals(NEW_VERSION.getRevision(), restartedWorker.getRevision()); + MasterWorkerInfo upgradedWorkerInfo = mBlockMaster.getWorker(workerId); + assertEquals(1, upgradedWorkerInfo.getBlockCount()); + BlockInfo blockInfoCheckAgain = mBlockMaster.getBlockInfo(blockId); + assertNotNull(blockInfoCheckAgain); + assertEquals(blockInfoCheckAgain.getLength(), blockLength); + // The block can be found on the decommissioned worker once the worker registers + // again after the upgrade + assertEquals(1, blockInfoCheckAgain.getLocations().size()); + BlockLocation locCheckAgain = blockInfoCheckAgain.getLocations().get(0); + assertEquals(workerId, locCheckAgain.getWorkerId()); + + // Heartbeat to the master again, the master does not remove the block incorrectly + alluxio.grpc.Command heartBeatAfterUpgrade = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Nothing, heartBeatAfterUpgrade.getCommandType()); + } + + @Test + public void decommissionRemoveUpgradeStreamRegister() throws Exception { + long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); + BlockMasterWorkerServiceHandler handler = new BlockMasterWorkerServiceHandler(mBlockMaster); + + // Sequence to simulate worker upgrade and downgrade, + // with or without buildVersion in registerWorkerPOptions + Queue errors = streamRegisterWorkerWithVersion(handler, workerId, 0L, + ImmutableList.of(), OLD_VERSION); + assertEquals(0, errors.size()); + List liveWorkerInfo = mBlockMaster.getWorkerInfoList(); + List allWorkerInfo = mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerInfo.size()); + assertEquals(1, allWorkerInfo.size()); + WorkerInfo w = liveWorkerInfo.get(0); + assertEquals(WorkerState.LIVE.toString(), w.getState()); + assertEquals(OLD_VERSION.getVersion(), w.getVersion()); + assertEquals(OLD_VERSION.getRevision(), w.getRevision()); + + // Prepare a block for removal + long blockId = 1L; + long blockLength = 100L; + mBlockMaster.commitBlock(workerId, blockLength, "MEM", "MEM", blockId, blockLength); + + // Decommission the worker + DecommissionWorkerPOptions decomReq = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .setCanRegisterAgain(true) + .build(); + mBlockMaster.decommissionWorker(decomReq); + List liveWorkersAfterDecom = mBlockMaster.getWorkerInfoList(); + assertEquals(0, liveWorkersAfterDecom.size()); + List allWorkersAfterDecom = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, allWorkersAfterDecom.size()); + WorkerInfo decomWorker = allWorkersAfterDecom.get(0); + assertEquals(WorkerState.DECOMMISSIONED.toString(), decomWorker.getState()); + assertEquals(OLD_VERSION.getVersion(), decomWorker.getVersion()); + assertEquals(OLD_VERSION.getRevision(), decomWorker.getRevision()); + + // After decommissioned, the worker can still heartbeat to the master + Map memUsage = ImmutableMap.of(Constants.MEDIUM_MEM, 0L); + alluxio.grpc.Command heartBeat = mBlockMaster.workerHeartbeat(workerId, null, memUsage, + NO_BLOCKS, NO_BLOCKS_ON_LOCATION, NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeat.getCommandType()); + + // Remove the block from the master and workers + mBlockMaster.removeBlocks(ImmutableList.of(blockId), true); + Exception e = assertThrows(BlockInfoException.class, () -> { + BlockInfo shouldNotExist = mBlockMaster.getBlockInfo(blockId); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.BLOCK_META_NOT_FOUND.getMessage(blockId))); + + // Heartbeat to the master again, the master does nothing about the block + Map memUsageWithBlock = ImmutableMap.of(Constants.MEDIUM_MEM, blockLength); + List memBlockList = ImmutableList.of(blockId); + Block.BlockLocation memTier = Block.BlockLocation.newBuilder() + .setTier("MEM").setMediumType("MEM").setWorkerId(workerId).build(); + alluxio.grpc.Command heartBeatAgain = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeatAgain.getCommandType()); + + // The worker registers again with a higher version + errors = streamRegisterWorkerWithVersion(handler, workerId, blockLength, + ImmutableList.of(blockId), NEW_VERSION); + assertEquals(0, errors.size()); + List liveWorkerAfterRestart = mBlockMaster.getWorkerInfoList(); + List allWorkerAfterRestart = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerAfterRestart.size()); + assertEquals(1, allWorkerAfterRestart.size()); + WorkerInfo restartedWorker = liveWorkerAfterRestart.get(0); + assertEquals(WorkerState.LIVE.toString(), restartedWorker.getState()); + assertEquals(NEW_VERSION.getVersion(), restartedWorker.getVersion()); + assertEquals(NEW_VERSION.getRevision(), restartedWorker.getRevision()); + MasterWorkerInfo upgradedWorkerInfo = mBlockMaster.getWorker(workerId); + // The block should not be recognized and therefore the master will want to remove that block + assertEquals(0, upgradedWorkerInfo.getBlockCount()); + assertEquals(1, upgradedWorkerInfo.getToRemoveBlockCount()); + + // Heartbeat to the master again, the master does not remove the block incorrectly + alluxio.grpc.Command heartBeatAfterUpgrade = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Free, heartBeatAfterUpgrade.getCommandType()); + assertEquals(ImmutableList.of(blockId), heartBeatAfterUpgrade.getDataList()); + } + + @Test + public void decommissionRemoveUpgradeRegister() throws Exception { + long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); + + // Sequence to simulate worker upgrade and downgrade, + // with or without buildVersion in registerWorkerPOptions + RegisterWorkerPOptions options = RegisterWorkerPOptions.newBuilder() + .setBuildVersion(OLD_VERSION).build(); + + mBlockMaster.workerRegister(workerId, + ImmutableList.of(Constants.MEDIUM_MEM), + ImmutableMap.of(Constants.MEDIUM_MEM, 100L), + ImmutableMap.of(Constants.MEDIUM_MEM, 0L), + NO_BLOCKS_ON_LOCATION, + NO_LOST_STORAGE, + options); + List liveWorkerInfo = mBlockMaster.getWorkerInfoList(); + List allWorkerInfo = mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerInfo.size()); + assertEquals(1, allWorkerInfo.size()); + WorkerInfo w = liveWorkerInfo.get(0); + assertEquals(WorkerState.LIVE.toString(), w.getState()); + assertEquals(OLD_VERSION.getVersion(), w.getVersion()); + assertEquals(OLD_VERSION.getRevision(), w.getRevision()); + + // Prepare a block for removal + long blockId = 1L; + long blockLength = 100L; + mBlockMaster.commitBlock(workerId, blockLength, "MEM", "MEM", blockId, blockLength); + + // Decommission the worker + DecommissionWorkerPOptions decomReq = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .setCanRegisterAgain(true) + .build(); + mBlockMaster.decommissionWorker(decomReq); + List liveWorkersAfterDecom = mBlockMaster.getWorkerInfoList(); + assertEquals(0, liveWorkersAfterDecom.size()); + List allWorkersAfterDecom = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, allWorkersAfterDecom.size()); + WorkerInfo decomWorker = allWorkersAfterDecom.get(0); + assertEquals(WorkerState.DECOMMISSIONED.toString(), decomWorker.getState()); + assertEquals(OLD_VERSION.getVersion(), decomWorker.getVersion()); + assertEquals(OLD_VERSION.getRevision(), decomWorker.getRevision()); + + // After decommissioned, the worker can still heartbeat to the master + Map memUsage = ImmutableMap.of(Constants.MEDIUM_MEM, 0L); + alluxio.grpc.Command heartBeat = mBlockMaster.workerHeartbeat(workerId, null, memUsage, + NO_BLOCKS, NO_BLOCKS_ON_LOCATION, NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeat.getCommandType()); + + // Remove the block from the master and workers + mBlockMaster.removeBlocks(ImmutableList.of(blockId), true); + Exception e = assertThrows(BlockInfoException.class, () -> { + BlockInfo shouldNotExist = mBlockMaster.getBlockInfo(blockId); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.BLOCK_META_NOT_FOUND.getMessage(blockId))); + + // Heartbeat to the master again, the master does nothing about the block + Map memUsageWithBlock = ImmutableMap.of(Constants.MEDIUM_MEM, blockLength); + List memBlockList = ImmutableList.of(blockId); + Block.BlockLocation memTier = Block.BlockLocation.newBuilder() + .setTier("MEM").setMediumType("MEM").setWorkerId(workerId).build(); + alluxio.grpc.Command heartBeatAgain = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeatAgain.getCommandType()); + + // The worker registers again with a higher version + RegisterWorkerPOptions upgradedWorker = RegisterWorkerPOptions.newBuilder() + .setBuildVersion(NEW_VERSION).build(); + mBlockMaster.workerRegister(workerId, + ImmutableList.of(Constants.MEDIUM_MEM), + memUsageWithBlock, + memUsageWithBlock, + ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, + upgradedWorker); + List liveWorkerAfterRestart = mBlockMaster.getWorkerInfoList(); + List allWorkerAfterRestart = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerAfterRestart.size()); + assertEquals(1, allWorkerAfterRestart.size()); + WorkerInfo restartedWorker = liveWorkerAfterRestart.get(0); + assertEquals(WorkerState.LIVE.toString(), restartedWorker.getState()); + assertEquals(NEW_VERSION.getVersion(), restartedWorker.getVersion()); + assertEquals(NEW_VERSION.getRevision(), restartedWorker.getRevision()); + MasterWorkerInfo upgradedWorkerInfo = mBlockMaster.getWorker(workerId); + // The block should not be recognized and therefore the master will want to remove that block + assertEquals(0, upgradedWorkerInfo.getBlockCount()); + assertEquals(1, upgradedWorkerInfo.getToRemoveBlockCount()); + + // Heartbeat to the master again, the master does not remove the block incorrectly + alluxio.grpc.Command heartBeatAfterUpgrade = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Free, heartBeatAfterUpgrade.getCommandType()); + assertEquals(ImmutableList.of(blockId), heartBeatAfterUpgrade.getDataList()); + } + + public static Queue streamRegisterWorkerWithVersion( + BlockMasterWorkerServiceHandler handler, + long workerId, long blockSize, List blockList, BuildVersion version) { + List requests = generateRegisterStreamForWorkerWithVersion( + workerId, blockSize, blockList, version); + Queue errorQueue = new ConcurrentLinkedQueue<>(); + sendStreamToMaster(handler, requests, getErrorCapturingResponseObserver(errorQueue)); + return errorQueue; + } + + public static List generateRegisterStreamForWorkerWithVersion( + long workerId, long blockSize, List blockList, BuildVersion version) { + Map> blockMap = new HashMap<>(); + BlockStoreLocation mem = new BlockStoreLocation("MEM", 0, "MEM"); + blockMap.put(mem, blockList); + + // We just use the RegisterStreamer to generate the batch of requests + RegisterStreamer registerStreamer = new RegisterStreamer(null, + workerId, ImmutableList.of("MEM"), + ImmutableMap.of("MEM", CAPACITY), // capacity + ImmutableMap.of("MEM", blockSize * blockList.size()), // usage + blockMap, LOST_STORAGE, EMPTY_CONFIG, version); + + // Get chunks from the RegisterStreamer + return ImmutableList.copyOf(registerStreamer); + } + + public static StreamObserver getErrorCapturingResponseObserver( + Queue errorQueue) { + return new StreamObserver() { + @Override + public void onNext(RegisterWorkerPResponse response) {} + + @Override + public void onError(Throwable t) { + errorQueue.offer(t); + } + + @Override + public void onCompleted() {} + }; + } + + public static void sendStreamToMaster(BlockMasterWorkerServiceHandler handler, + List requestChunks, + StreamObserver responseObserver) { + StreamObserver requestObserver = + handler.registerWorkerStream(responseObserver); + for (RegisterWorkerPRequest chunk : requestChunks) { + requestObserver.onNext(chunk); + } + requestObserver.onCompleted(); + } + + @Test + public void streamRegDecommissionUpgradeStreamReg() throws Exception { + long worker1 = mBlockMaster.getWorkerId(NET_ADDRESS_1); + + // Sequence to simulate worker upgrade and downgrade, + // with or without buildVersion in registerWorkerPOptions + BuildVersion oldVersion = BuildVersion.newBuilder().setVersion("1.0.0") + .setRevision("abc").build(); + BuildVersion newVersion = BuildVersion.newBuilder().setVersion("1.1.0") + .setRevision("def").build(); + + BlockMasterWorkerServiceHandler handler = new BlockMasterWorkerServiceHandler(mBlockMaster); + Queue errors = streamRegisterWorkerWithVersion(handler, worker1, 64 * Constants.MB, + ImmutableList.of(), oldVersion); + assertEquals(0, errors.size()); + + List availableWorkerList = mBlockMaster.getWorkerInfoList(); + assertEquals(1, availableWorkerList.size()); + assertEquals(1, mBlockMaster.getWorkerCount()); + assertEquals(0, mBlockMaster.getLostWorkerCount()); + assertEquals(0, mBlockMaster.getDecommissionedWorkerCount()); + assertEquals(oldVersion.getVersion(), availableWorkerList.get(0).getVersion()); + assertEquals(oldVersion.getRevision(), availableWorkerList.get(0).getRevision()); + + // Decommission the worker + DecommissionWorkerPOptions decomReq = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .setCanRegisterAgain(true) + .build(); + mBlockMaster.decommissionWorker(decomReq); + assertEquals(0, mBlockMaster.getWorkerCount()); + assertEquals(0, mBlockMaster.getLostWorkerCount()); + assertEquals(1, mBlockMaster.getDecommissionedWorkerCount()); + List workerReport = mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(oldVersion.getVersion(), workerReport.get(0).getVersion()); + assertEquals(oldVersion.getRevision(), workerReport.get(0).getRevision()); + + // Worker is restarted with a newer version + errors = streamRegisterWorkerWithVersion(handler, worker1, 64 * Constants.MB, + ImmutableList.of(), newVersion); + assertEquals(0, errors.size()); + assertEquals(1, mBlockMaster.getWorkerCount()); + assertEquals(0, mBlockMaster.getLostWorkerCount()); + assertEquals(0, mBlockMaster.getDecommissionedWorkerCount()); + List availableWorkerListNow = mBlockMaster.getWorkerInfoList(); + assertEquals(newVersion.getVersion(), availableWorkerListNow.get(0).getVersion()); + assertEquals(newVersion.getRevision(), availableWorkerListNow.get(0).getRevision()); + } + + private GetWorkerReportOptions createGetWorkerReportOptions() { + GetWorkerReportOptions getReportOptions = GetWorkerReportOptions.defaults(); + getReportOptions.setFieldRange(GetWorkerReportOptions.WorkerInfoField.ALL); + getReportOptions.setWorkerRange(GetWorkerReportOptions.WorkerRange.ALL); + return getReportOptions; } @Test diff --git a/core/server/master/src/test/java/alluxio/master/block/BlockMasterWorkerServiceHandlerTest.java b/core/server/master/src/test/java/alluxio/master/block/BlockMasterWorkerServiceHandlerTest.java index 2de7f5a1e16d..20b0af9ea91f 100644 --- a/core/server/master/src/test/java/alluxio/master/block/BlockMasterWorkerServiceHandlerTest.java +++ b/core/server/master/src/test/java/alluxio/master/block/BlockMasterWorkerServiceHandlerTest.java @@ -28,9 +28,11 @@ import alluxio.grpc.RegisterWorkerPOptions; import alluxio.grpc.RegisterWorkerPRequest; import alluxio.grpc.RegisterWorkerPResponse; +import alluxio.master.AlwaysPrimaryPrimarySelector; import alluxio.master.CoreMasterContext; import alluxio.master.MasterRegistry; import alluxio.master.MasterTestUtils; +import alluxio.master.journal.noop.NoopJournalSystem; import alluxio.master.metrics.MetricsMaster; import alluxio.master.metrics.MetricsMasterFactory; import alluxio.util.SleepUtils; @@ -84,7 +86,8 @@ public void initServiceHandler(boolean leaseEnabled) throws Exception { } mRegistry = new MasterRegistry(); - CoreMasterContext masterContext = MasterTestUtils.testMasterContext(); + CoreMasterContext masterContext = MasterTestUtils.testMasterContext(new NoopJournalSystem(), + null, new AlwaysPrimaryPrimarySelector()); mMetricsMaster = new MetricsMasterFactory().create(mRegistry, masterContext); mClock = new ManualClock(); mExecutorService = diff --git a/core/server/master/src/test/java/alluxio/master/block/ConcurrentBlockMasterTest.java b/core/server/master/src/test/java/alluxio/master/block/ConcurrentBlockMasterTest.java index db88d22011ec..837546568249 100644 --- a/core/server/master/src/test/java/alluxio/master/block/ConcurrentBlockMasterTest.java +++ b/core/server/master/src/test/java/alluxio/master/block/ConcurrentBlockMasterTest.java @@ -27,9 +27,11 @@ import alluxio.grpc.StorageList; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.ManuallyScheduleHeartbeat; +import alluxio.master.AlwaysPrimaryPrimarySelector; import alluxio.master.CoreMasterContext; import alluxio.master.MasterRegistry; import alluxio.master.MasterTestUtils; +import alluxio.master.journal.noop.NoopJournalSystem; import alluxio.master.metrics.MetricsMaster; import alluxio.master.metrics.MetricsMasterFactory; import alluxio.proto.meta.Block; @@ -109,7 +111,9 @@ public class ConcurrentBlockMasterTest { @Before public void before() throws Exception { mRegistry = new MasterRegistry(); - mMasterContext = MasterTestUtils.testMasterContext(); + mMasterContext = MasterTestUtils.testMasterContext( + new NoopJournalSystem(), null, new AlwaysPrimaryPrimarySelector() + ); mMetricsMaster = new MetricsMasterFactory().create(mRegistry, mMasterContext); mClock = new ManualClock(); mExecutorService = diff --git a/core/server/master/src/test/java/alluxio/master/block/DefaultBlockMasterCheckpointTest.java b/core/server/master/src/test/java/alluxio/master/block/DefaultBlockMasterCheckpointTest.java new file mode 100644 index 000000000000..e49963eb88cd --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/block/DefaultBlockMasterCheckpointTest.java @@ -0,0 +1,125 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.block; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.BlockInfoException; +import alluxio.master.CoreMasterContext; +import alluxio.master.MasterRegistry; +import alluxio.master.MasterTestUtils; +import alluxio.master.MasterUtils; +import alluxio.master.journal.checkpoint.CheckpointInputStream; +import alluxio.master.journal.noop.NoopJournalSystem; +import alluxio.master.metastore.MetastoreType; +import alluxio.master.metrics.MetricsMaster; +import alluxio.master.metrics.MetricsMasterFactory; +import alluxio.proto.journal.Block; +import alluxio.proto.journal.Journal; +import alluxio.wire.BlockInfo; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +@RunWith(Parameterized.class) +public class DefaultBlockMasterCheckpointTest { + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(MetastoreType.HEAP, MetastoreType.ROCKS); + } + + @Parameterized.Parameter + public MetastoreType mMetastoreType; + + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + private DefaultBlockMaster mBlockMaster; + + private final long mNextContainerId = 1; + private final long mBlockId1 = 2; + private final long mBlockId2 = 3; + private final long mBlockLength = 4; + + private DefaultBlockMaster createDefaultBlockMaster() throws IOException { + CoreMasterContext context = MasterTestUtils.testMasterContext(new NoopJournalSystem(), null, + MasterUtils.getBlockStoreFactory(mFolder.newFolder().getAbsolutePath()), + MasterUtils.getInodeStoreFactory(mFolder.newFolder().getAbsolutePath())); + MetricsMasterFactory metricsMasterFactory = new MetricsMasterFactory(); + MetricsMaster metricsMaster = metricsMasterFactory.create(new MasterRegistry(), context); + return new DefaultBlockMaster(metricsMaster, context); + } + + @Before + public void before() throws IOException { + Configuration.set(PropertyKey.MASTER_BLOCK_METASTORE, mMetastoreType); + mBlockMaster = createDefaultBlockMaster(); + mBlockMaster.processJournalEntry(Journal.JournalEntry.newBuilder() + .setBlockContainerIdGenerator(Block.BlockContainerIdGeneratorEntry.newBuilder() + .setNextContainerId(mNextContainerId)).build()); + mBlockMaster.processJournalEntry(Journal.JournalEntry.newBuilder() + .setBlockInfo(Block.BlockInfoEntry.newBuilder() + .setBlockId(mBlockId1)).build()); + mBlockMaster.processJournalEntry(Journal.JournalEntry.newBuilder() + .setBlockInfo(Block.BlockInfoEntry.newBuilder() + .setBlockId(mBlockId2) + .setLength(mBlockLength)).build()); + mBlockMaster.processJournalEntry(Journal.JournalEntry.newBuilder() + .setDeleteBlock(Block.DeleteBlockEntry.newBuilder() + .setBlockId(mBlockId1)).build()); + } + + @Test + public void testOutputStream() throws IOException, InterruptedException, BlockInfoException { + File file = mFolder.newFile(); + try (OutputStream outputStream = Files.newOutputStream(file.toPath())) { + mBlockMaster.writeToCheckpoint(outputStream); + } + DefaultBlockMaster blockMaster = createDefaultBlockMaster(); + try (CheckpointInputStream inputStream = + new CheckpointInputStream(Files.newInputStream(file.toPath()))) { + blockMaster.restoreFromCheckpoint(inputStream); + } + Assert.assertEquals(mNextContainerId, blockMaster.getJournaledNextContainerId()); + Assert.assertThrows(BlockInfoException.class, () -> blockMaster.getBlockInfo(mBlockId1)); + BlockInfo blockInfo = blockMaster.getBlockInfo(mBlockId2); + Assert.assertEquals(mBlockLength, blockInfo.getLength()); + } + + @Test + public void testDirectory() throws IOException, BlockInfoException { + File dir = mFolder.newFolder(); + ExecutorService executor = Executors.newSingleThreadExecutor(); + mBlockMaster.writeToCheckpoint(dir, executor).join(); + DefaultBlockMaster blockMaster = createDefaultBlockMaster(); + blockMaster.restoreFromCheckpoint(dir, executor).join(); + + Assert.assertEquals(mNextContainerId, blockMaster.getJournaledNextContainerId()); + Assert.assertThrows(BlockInfoException.class, () -> blockMaster.getBlockInfo(mBlockId1)); + BlockInfo blockInfo = blockMaster.getBlockInfo(mBlockId2); + Assert.assertEquals(mBlockLength, blockInfo.getLength()); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/block/meta/MasterWorkerInfoTest.java b/core/server/master/src/test/java/alluxio/master/block/meta/MasterWorkerInfoTest.java index 25f76f7ea64d..e5bc773cce51 100644 --- a/core/server/master/src/test/java/alluxio/master/block/meta/MasterWorkerInfoTest.java +++ b/core/server/master/src/test/java/alluxio/master/block/meta/MasterWorkerInfoTest.java @@ -19,6 +19,7 @@ import alluxio.DefaultStorageTierAssoc; import alluxio.StorageTierAssoc; import alluxio.client.block.options.GetWorkerReportOptions; +import alluxio.master.WorkerState; import alluxio.wire.WorkerInfo; import alluxio.wire.WorkerNetAddress; @@ -139,10 +140,10 @@ public void blockOperation() { @Test public void workerInfoGeneration() { WorkerInfo workerInfo = mInfo.generateWorkerInfo(GetWorkerReportOptions.WorkerInfoField.ALL, - true); + WorkerState.LIVE); assertEquals(mInfo.getId(), workerInfo.getId()); assertEquals(mInfo.getWorkerAddress(), workerInfo.getAddress()); - assertEquals("In Service", workerInfo.getState()); + assertEquals(WorkerState.LIVE.toString(), workerInfo.getState()); assertEquals(mInfo.getCapacityBytes(), workerInfo.getCapacityBytes()); assertEquals(mInfo.getUsedBytes(), workerInfo.getUsedBytes()); assertEquals(mInfo.getStartTime(), workerInfo.getStartTimeMs()); diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemJournalEntryMergerTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemJournalEntryMergerTest.java index d2c1e77bdb80..746404a26b6e 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemJournalEntryMergerTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemJournalEntryMergerTest.java @@ -13,6 +13,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import alluxio.AlluxioURI; import alluxio.master.block.BlockId; @@ -112,4 +113,38 @@ public void testFileSystemJournalEntryMerger() { merger.clear(); assertEquals(0, merger.getMergedJournalEntries().size()); } + + @Test + public void testMergeDirectoryFingerprint() { + AlluxioURI uri = new AlluxioURI("/dir/test1"); + + FileSystemJournalEntryMerger merger = new FileSystemJournalEntryMerger(); + + merger.add(Journal.JournalEntry.newBuilder().setInodeDirectory( + File.InodeDirectoryEntry.newBuilder().setId(1).setParentId(0) + .setPersistenceState(PersistenceState.PERSISTED.name()) + .setName("test_dir").setPath("test_dir").build()).build()); + + merger.add(Journal.JournalEntry.newBuilder().setUpdateInodeDirectory( + File.UpdateInodeDirectoryEntry.newBuilder().setId(1) + .setDirectChildrenLoaded(true).build()).build()); + + merger.add(Journal.JournalEntry.newBuilder().setUpdateInode( + File.UpdateInodeEntry.newBuilder().setId(1) + .setName("test_dir_updated") + .setUfsFingerprint("fingerprint") + .build()).build()); + + List entries = merger.getMergedJournalEntries(); + Journal.JournalEntry entry = entries.get(0); + assertNotNull(entry.getInodeDirectory()); + assertEquals(1, entry.getInodeDirectory().getId()); + assertEquals("test_dir_updated", entry.getInodeDirectory().getName()); + assertEquals("test_dir", entry.getInodeDirectory().getPath()); + assertTrue(entry.getInodeDirectory().getDirectChildrenLoaded()); + + Journal.JournalEntry entry2 = entries.get(1); + assertNotNull(entry2.getUpdateInode()); + assertEquals("fingerprint", entry2.getUpdateInode().getUfsFingerprint()); + } } diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterFsOptsTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterFsOptsTest.java index 4eacaa95b0cc..62f93ecf559d 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterFsOptsTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterFsOptsTest.java @@ -14,6 +14,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -105,6 +106,34 @@ public void createFileUsesOperationTime() throws Exception { assertEquals(100, info.getLastAccessTimeMs()); } + @Test + public void createFileWithOverwrite() throws Exception { + AlluxioURI path = new AlluxioURI("/test"); + mFileSystemMaster.createFile(path, CreateFileContext.defaults()); + // create without overwrite + Exception e = assertThrows(FileAlreadyExistsException.class, () -> { + mFileSystemMaster.createFile(path, CreateFileContext.defaults()); + }); + assertTrue(e.getMessage() + .contains(ExceptionMessage.CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE.getMessage(path))); + + // create with overwrite + CreateFileContext createFileContextWithOverwrite = CreateFileContext.defaults(); + createFileContextWithOverwrite.getOptions().setOverwrite(true); + mFileSystemMaster.createFile(path, createFileContextWithOverwrite); + FileInfo info = mFileSystemMaster.getFileInfo(path, GetStatusContext.defaults()); + + // overwrite an existed directory + AlluxioURI testpath = new AlluxioURI("/test2"); + mFileSystemMaster.createDirectory(testpath, CreateDirectoryContext.defaults()); + + e = assertThrows(FileAlreadyExistsException.class, () -> { + mFileSystemMaster.createFile(testpath, createFileContextWithOverwrite); + }); + assertTrue(e.getMessage() + .contains(ExceptionMessage.CANNOT_OVERWRITE_DIRECTORY.getMessage(testpath))); + } + /** * Tests the {@link FileSystemMaster#delete(AlluxioURI, DeleteContext)} method. */ diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterS3UfsTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterS3UfsTest.java new file mode 100644 index 000000000000..dcde599c818c --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterS3UfsTest.java @@ -0,0 +1,111 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.AccessControlException; +import alluxio.exception.BlockInfoException; +import alluxio.exception.FileAlreadyCompletedException; +import alluxio.exception.FileAlreadyExistsException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidFileSizeException; +import alluxio.exception.InvalidPathException; +import alluxio.master.file.contexts.ExistsContext; +import alluxio.master.file.contexts.MountContext; + +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.regions.Regions; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import org.gaul.s3proxy.junit.S3ProxyRule; +import org.junit.Ignore; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +/** + * Unit tests for {@link FileSystemMaster}. + */ +public final class FileSystemMasterS3UfsTest extends FileSystemMasterTestBase { + private static final Logger LOG = LoggerFactory.getLogger(FileSystemMasterS3UfsTest.class); + private static final String TEST_BUCKET = "test-bucket"; + private static final String TEST_FILE = "test_file"; + private static final String TEST_DIRECTORY = "test_directory"; + private static final String TEST_CONTENT = "test_content"; + private static final AlluxioURI UFS_ROOT = new AlluxioURI("s3://test-bucket/"); + private static final AlluxioURI MOUNT_POINT = new AlluxioURI("/s3_mount"); + private AmazonS3 mS3Client; + @Rule + public S3ProxyRule mS3Proxy = S3ProxyRule.builder() + .withPort(8001) + .withCredentials("_", "_") + .build(); + + @Override + public void before() throws Exception { + Configuration.set(PropertyKey.UNDERFS_S3_ENDPOINT, "localhost:8001"); + Configuration.set(PropertyKey.UNDERFS_S3_ENDPOINT_REGION, "us-west-2"); + Configuration.set(PropertyKey.UNDERFS_S3_DISABLE_DNS_BUCKETS, true); + Configuration.set(PropertyKey.S3A_ACCESS_KEY, mS3Proxy.getAccessKey()); + Configuration.set(PropertyKey.S3A_SECRET_KEY, mS3Proxy.getSecretKey()); + + mS3Client = AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withCredentials( + new AWSStaticCredentialsProvider( + new BasicAWSCredentials(mS3Proxy.getAccessKey(), mS3Proxy.getSecretKey()))) + .withEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration(mS3Proxy.getUri().toString(), + Regions.US_WEST_2.getName())) + .build(); + mS3Client.createBucket(TEST_BUCKET); + + super.before(); + } + + @Ignore + @Test + public void basicWrite() + throws FileDoesNotExistException, FileAlreadyExistsException, AccessControlException, + IOException, InvalidPathException, BlockInfoException, InvalidFileSizeException, + FileAlreadyCompletedException { + // Not testable: + // when you create a directory, there's nothing created correspondingly in S3 + // when you create a file, you need to open it on the client side to write the content, + // which is out of the scope of this testing. + } + + @Test + public void basicSync() + throws FileDoesNotExistException, FileAlreadyExistsException, AccessControlException, + IOException, InvalidPathException { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join(TEST_FILE), ExistsContext.defaults())); + } + + @Override + public void after() throws Exception { + mS3Client = null; + super.after(); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataConcurrentTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataConcurrentTest.java index 1ea48f3bffbe..824f65184c92 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataConcurrentTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataConcurrentTest.java @@ -162,6 +162,23 @@ public void syncTheSameDirectoryButTheSecondCallCancelled() throws Exception { assertEquals(InodeSyncStream.SyncStatus.OK, iss3.sync()); } + @Test + public void syncWhenShouldSyncIsSetTrue() throws Exception { + Supplier inodeSyncStreamSupplier = () -> new InodeSyncStream( + new LockingScheme( + new AlluxioURI("/"), InodeTree.LockPattern.READ, true), + mFileSystemMaster, mFileSystemMaster.getSyncPathCache(), + RpcContext.NOOP, DescendantType.ALL, FileSystemMasterCommonPOptions.getDefaultInstance(), + false, + false, + false); + + InodeSyncStream iss1 = inodeSyncStreamSupplier.get(); + InodeSyncStream iss2 = inodeSyncStreamSupplier.get(); + assertSyncHappenTwice(syncConcurrent(iss1, iss2)); + assertSyncHappenTwice(syncSequential(inodeSyncStreamSupplier, inodeSyncStreamSupplier)); + } + private void assertTheSecondSyncSkipped( Pair results) { assertEquals(InodeSyncStream.SyncStatus.OK, results.getFirst()); diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataTest.java index fe4cf44cc9fc..03d1d64e6b80 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataTest.java @@ -26,10 +26,13 @@ import alluxio.exception.FileDoesNotExistException; import alluxio.exception.InvalidPathException; import alluxio.file.options.DescendantType; +import alluxio.grpc.CompleteFilePOptions; +import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.DeletePOptions; import alluxio.grpc.FileSystemMasterCommonPOptions; import alluxio.grpc.GetStatusPOptions; import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.WritePType; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.ManuallyScheduleHeartbeat; import alluxio.master.CoreMasterContext; @@ -38,7 +41,9 @@ import alluxio.master.MasterTestUtils; import alluxio.master.block.BlockMaster; import alluxio.master.block.BlockMasterFactory; +import alluxio.master.file.contexts.CompleteFileContext; import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.CreateFileContext; import alluxio.master.file.contexts.DeleteContext; import alluxio.master.file.contexts.GetStatusContext; import alluxio.master.file.contexts.ListStatusContext; @@ -54,6 +59,7 @@ import alluxio.underfs.Fingerprint; import alluxio.underfs.UfsDirectoryStatus; import alluxio.underfs.UfsFileStatus; +import alluxio.underfs.UfsMode; import alluxio.underfs.UfsStatus; import alluxio.underfs.UnderFileSystem; import alluxio.util.IdUtils; @@ -126,6 +132,58 @@ public void after() throws Exception { stopServices(); } + @Test + public void completeFileWithOutOfDateHash() throws Exception { + // In this test we want to simulate a concurrent write to the UFS + // while the file is being created in Alluxio. + // When creating the file in Alluxio, we will use the fingerprint + // of the created file, and not the one on the UFS. + // Thus, when performing a metadata sync there should be a fingerprint + // mismatch. + AlluxioURI ufsMount = setupMockUfsS3Mount(); + String fname = "file"; + AlluxioURI uri = new AlluxioURI("/mnt/local/" + fname); + + // The fingerprint of the file created in Alluxio + String alluxioContentHash = "hashOnComplete"; + // The fingerprint of the file in the UFS + String ufsContentHash = "ufsHash"; + + AlluxioURI filePath = ufsMount.join("file"); + UfsFileStatus fileStatus = new UfsFileStatus( + "file", ufsContentHash, 0L, System.currentTimeMillis(), + "owner1", "owner1", (short) 777, null, 100L); + Mockito.doAnswer(invocation -> + Fingerprint.create("s3", fileStatus, + invocation.getArgument(1))).when(mUfs).getParsedFingerprint( + eq(filePath.toString()), anyString()); + Mockito.doAnswer(invocation -> + Fingerprint.create("s3", fileStatus)) + .when(mUfs).getParsedFingerprint( + eq(filePath.toString())); + Mockito.when(mUfs.exists(filePath.toString())).thenReturn(true); + Mockito.when(mUfs.isDirectory(filePath.toString())).thenReturn(false); + Mockito.when(mUfs.isFile(filePath.toString())).thenReturn(true); + Mockito.when(mUfs.getStatus(filePath.toString())).thenReturn(fileStatus); + Mockito.when(mUfs.getOperationMode(any())).thenReturn(UfsMode.READ_WRITE); + + mFileSystemMaster.createFile(uri, CreateFileContext.mergeFrom(CreateFilePOptions + .newBuilder().setWriteType(WritePType.THROUGH))); + mFileSystemMaster.completeFile(uri, CompleteFileContext.mergeFrom( + CompleteFilePOptions.newBuilder().setContentHash(alluxioContentHash))); + + FileInfo info = mFileSystemMaster.getFileInfo(uri, GetStatusContext.defaults()); + assertEquals(alluxioContentHash, Fingerprint.parse(info.getUfsFingerprint()) + .getTag(Fingerprint.Tag.CONTENT_HASH)); + + // After syncing we should have the new version of the file with the new fingerprint + info = mFileSystemMaster.getFileInfo(uri, + GetStatusContext.mergeFrom(GetStatusPOptions.newBuilder().setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(0).build()))); + assertEquals(ufsContentHash, Fingerprint.parse(info.getUfsFingerprint()) + .getTag(Fingerprint.Tag.CONTENT_HASH)); + } + @Test public void setAttributeOwnerGroupOnMetadataUpdate() throws Exception { AlluxioURI ufsMount = setupMockUfsS3Mount(); @@ -267,6 +325,73 @@ public void deleteAlluxioOnlyNoSync() throws Exception { assertFalse(delegateMaster.mSynced.get()); } + /** + * Tests the getStatus operation does not trigger a metadata sync that loads its children. + */ + @Test + public void getStatusOnDirectory() throws Exception { + AlluxioURI ufsMount = setupMockUfsS3Mount(); + short mode = ModeUtils.getUMask("0700").toShort(); + + // Mock dir1 ufs path + AlluxioURI dir1Path = ufsMount.join("dir1"); + UfsDirectoryStatus dir1Status = new UfsDirectoryStatus(dir1Path.getPath(), "", "", mode); + Mockito.when(mUfs.getParsedFingerprint(dir1Path.toString())) + .thenReturn(Fingerprint.create("s3", dir1Status)); + Mockito.when(mUfs.exists(dir1Path.toString())).thenReturn(true); + Mockito.when(mUfs.isDirectory(dir1Path.toString())).thenReturn(true); + Mockito.when(mUfs.isFile(dir1Path.toString())).thenReturn(false); + Mockito.when(mUfs.getStatus(dir1Path.toString())).thenReturn(dir1Status); + Mockito.when(mUfs.getDirectoryStatus(dir1Path.toString())).thenReturn(dir1Status); + + // Mock nested ufs path /dir1/dir2 + AlluxioURI nestedDirectoryPath = ufsMount.join("dir1").join("dir2"); + UfsDirectoryStatus nestedDirStatus = + new UfsDirectoryStatus(dir1Path.getPath(), "", "", mode); + + Mockito.when(mUfs.getParsedFingerprint(nestedDirectoryPath.toString())) + .thenReturn(Fingerprint.create("s3", nestedDirStatus)); + Mockito.when(mUfs.exists(nestedDirectoryPath.toString())).thenReturn(true); + Mockito.when(mUfs.isDirectory(nestedDirectoryPath.toString())).thenReturn(true); + Mockito.when(mUfs.isFile(nestedDirectoryPath.toString())).thenReturn(false); + Mockito.when(mUfs.getStatus(nestedDirectoryPath.toString())).thenReturn(nestedDirStatus); + Mockito.when(mUfs.getDirectoryStatus(nestedDirectoryPath.toString())) + .thenReturn(nestedDirStatus); + + // Mock creating the same directory and nested file in UFS out of band + AlluxioURI dir1 = new AlluxioURI("/mnt/local/dir1"); + AlluxioURI dir2 = new AlluxioURI("/mnt/local/dir1/dir2"); + Mockito.when(mUfs.listStatus(eq(dir1Path.toString()))) + .thenReturn(new UfsStatus[]{new UfsDirectoryStatus("dir2", "", "", mode)}); + Mockito.when(mUfs.listStatus(eq(nestedDirectoryPath.toString()))) + .thenReturn(new UfsStatus[]{}); + + // List the nested directory + // listStatus is called on UFS /dir1/dir2 + mFileSystemMaster.listStatus(dir2, ListStatusContext.mergeFrom( + ListStatusPOptions.newBuilder().setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(0).build()))); + Mockito.verify(mUfs, Mockito.times(0)) + .listStatus(eq(dir1Path.toString())); + Mockito.verify(mUfs, Mockito.times(1)) + .listStatus(eq(nestedDirectoryPath.toString())); + Mockito.verify(mUfs, Mockito.times(1)) + .getStatus(eq(nestedDirectoryPath.toString())); + + // Get the file info of the directory /dir1 + // listStatus is called on UFS /dir1/dir2 + // Make sure there is neither list nor get on UFS /dir1/dir2 + mFileSystemMaster.getFileInfo(dir1, GetStatusContext.mergeFrom( + GetStatusPOptions.newBuilder().setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(0).build()))); + Mockito.verify(mUfs, Mockito.times(0)) + .listStatus(eq(dir1Path.toString())); + Mockito.verify(mUfs, Mockito.times(1)) + .listStatus(eq(nestedDirectoryPath.toString())); + Mockito.verify(mUfs, Mockito.times(1)) + .getStatus(eq(nestedDirectoryPath.toString())); + } + private static class SyncAwareFileSystemMaster extends DefaultFileSystemMaster { AtomicBoolean mSynced = new AtomicBoolean(false); diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java index 40278a8d5fdb..9c41577513a9 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java @@ -625,7 +625,10 @@ public void ttlDirectoryDelete() throws Exception { FileInfo fileInfo = mFileSystemMaster.getFileInfo(dirId); assertEquals(fileInfo.getFileId(), dirId); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - mThrown.expect(FileDoesNotExistException.class); + // TTL is set to 0, the directory should have been freed during last TTL check. + assertEquals(0, + mFileSystemMaster.getFileInfo(NESTED_DIR_URI, GET_STATUS_CONTEXT) + .getInAlluxioPercentage()); mFileSystemMaster.getFileInfo(dirId); } @@ -646,7 +649,10 @@ public void ttlDirectoryDeleteReplay() throws Exception { FileInfo fileInfo = mFileSystemMaster.getFileInfo(dirId); assertEquals(fileInfo.getFileId(), dirId); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - mThrown.expect(FileDoesNotExistException.class); + // TTL is set to 0, the directory should have been freed during last TTL check. + assertEquals(0, + mFileSystemMaster.getFileInfo(NESTED_DIR_URI, GET_STATUS_CONTEXT) + .getInAlluxioPercentage()); mFileSystemMaster.getFileInfo(dirId); } @@ -764,8 +770,10 @@ public void setTtlForFileWithNoTtl() throws Exception { SetAttributeContext.mergeFrom(SetAttributePOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(0)))); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - // TTL is set to 0, the file should have been deleted during last TTL check. - mThrown.expect(FileDoesNotExistException.class); + // TTL is set to 0, the file should have been freed during last TTL check. + assertEquals(0, + mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT) + .getInAlluxioPercentage()); mFileSystemMaster.getFileInfo(fileId); } @@ -791,8 +799,10 @@ public void setTtlForDirectoryWithNoTtl() throws Exception { SetAttributeContext.mergeFrom(SetAttributePOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(0)))); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - // TTL is set to 0, the file and directory should have been deleted during last TTL check. - mThrown.expect(FileDoesNotExistException.class); + // TTL is set to 0, the file should have been freed during last TTL check. + assertEquals(0, + mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT) + .getInAlluxioPercentage()); mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT); mFileSystemMaster.getFileInfo(NESTED_DIR_URI, GET_STATUS_CONTEXT); mFileSystemMaster.getFileInfo(NESTED_FILE_URI, GET_STATUS_CONTEXT); @@ -817,8 +827,10 @@ public void setSmallerTtlForFileWithTtl() throws Exception { SetAttributeContext.mergeFrom(SetAttributePOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(0)))); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - // TTL is reset to 0, the file should have been deleted during last TTL check. - mThrown.expect(FileDoesNotExistException.class); + // TTL is set to 0, the file should have been freed during last TTL check. + assertEquals(0, + mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT) + .getInAlluxioPercentage()); mFileSystemMaster.getFileInfo(fileId); } @@ -840,8 +852,10 @@ public void setSmallerTtlForDirectoryWithTtl() throws Exception { SetAttributeContext.mergeFrom(SetAttributePOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(0)))); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - // TTL is reset to 0, the file should have been deleted during last TTL check. - mThrown.expect(FileDoesNotExistException.class); + // TTL is set to 0, the file should have been freed during last TTL check. + assertEquals(0, + mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT) + .getInAlluxioPercentage()); mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT); } @@ -1765,10 +1779,10 @@ public void writeToReadOnlyFileWhileCreating() throws Exception { @Test public void RecursiveDeleteForceFlushJournals() throws Exception { - FileSystemMaster fileSystemMasterWithSpy = spy(mFileSystemMaster); + DefaultFileSystemMaster fileSystemMasterWithSpy = spy(mFileSystemMaster); AtomicInteger flushCount = new AtomicInteger(); AtomicInteger closeCount = new AtomicInteger(); - when(fileSystemMasterWithSpy.createJournalContext()).thenReturn( + when(fileSystemMasterWithSpy.createJournalContext(true)).thenReturn( new JournalContext() { private int mNumLogs = 0; diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTestBase.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTestBase.java index 0e90d2f796c1..cfa8b17daad8 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTestBase.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTestBase.java @@ -43,8 +43,12 @@ import alluxio.master.file.contexts.GetStatusContext; import alluxio.master.file.contexts.ListStatusContext; import alluxio.master.file.contexts.MountContext; +import alluxio.master.file.mdsync.DefaultSyncProcess; +import alluxio.master.file.mdsync.TestSyncProcessor; import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.MountTable; import alluxio.master.file.meta.TtlIntervalRule; +import alluxio.master.file.meta.UfsSyncPathCache; import alluxio.master.journal.JournalSystem; import alluxio.master.journal.JournalTestUtils; import alluxio.master.journal.JournalType; @@ -363,7 +367,15 @@ void startServices() throws Exception { mExecutorService = Executors .newFixedThreadPool(4, ThreadFactoryUtils.build("DefaultFileSystemMasterTest-%d", true)); mFileSystemMaster = new DefaultFileSystemMaster(mBlockMaster, masterContext, - ExecutorServiceFactories.constantExecutorServiceFactory(mExecutorService), mClock); + ExecutorServiceFactories.constantExecutorServiceFactory(mExecutorService), mClock) { + @Override + protected DefaultSyncProcess createSyncProcess( + ReadOnlyInodeStore inodeStore, MountTable mountTable, InodeTree inodeTree, + UfsSyncPathCache syncPathCache) { + return new TestSyncProcessor( + this, inodeStore, mountTable, inodeTree, syncPathCache, getAbsentPathCache()); + } + }; mInodeStore = mFileSystemMaster.getInodeStore(); mInodeTree = mFileSystemMaster.getInodeTree(); mRegistry.add(FileSystemMaster.class, mFileSystemMaster); @@ -382,7 +394,7 @@ void startServices() throws Exception { Constants.MEDIUM_SSD, (long) Constants.KB), ImmutableMap.of(), new HashMap(), RegisterWorkerPOptions.getDefaultInstance()); mWorkerId2 = mBlockMaster.getWorkerId( - new WorkerNetAddress().setHost("remote").setRpcPort(80).setDataPort(81).setWebPort(82)); + new WorkerNetAddress().setHost("localhost").setRpcPort(83).setDataPort(84).setWebPort(85)); mBlockMaster.workerRegister(mWorkerId2, Arrays.asList(Constants.MEDIUM_MEM, Constants.MEDIUM_SSD), ImmutableMap.of(Constants.MEDIUM_MEM, (long) Constants.MB, diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2BenchmarkTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2BenchmarkTest.java new file mode 100644 index 000000000000..50de76fe9699 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2BenchmarkTest.java @@ -0,0 +1,131 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import alluxio.AlluxioURI; +import alluxio.exception.AccessControlException; +import alluxio.exception.FileAlreadyExistsException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.FileSystemMasterCommonPOptions; +import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.LoadMetadataPType; +import alluxio.master.file.contexts.ListStatusContext; +import alluxio.master.file.contexts.MountContext; +import alluxio.master.file.mdsync.BaseTask; +import alluxio.util.CommonUtils; + +import org.apache.commons.io.FileUtils; +import org.junit.Ignore; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; + +/** + * This class is to test the load metadata performance against a local UFS. + * use {@link FileSystemMetadataSyncV2BenchmarkTest#generateTestFiles()} to generate test files + * first, then run the v1 or v2 sync respectively. + * This class is for debugging and should not be run as a unit test. + */ +@Ignore +public final class FileSystemMetadataSyncV2BenchmarkTest extends FileSystemMasterTestBase { + private static final Logger LOG = + LoggerFactory.getLogger(FileSystemMetadataSyncV2BenchmarkTest.class); + private static final String LOCAL_FS_ABSOLUTE_PATH = "/tmp/s3-test-files/bucket"; + private static final String SUB_DIR = "/0/0/0/0"; + private static final AlluxioURI UFS_ROOT = new AlluxioURI( + "file://" + LOCAL_FS_ABSOLUTE_PATH + SUB_DIR); + private static final AlluxioURI MOUNT_POINT = new AlluxioURI("/local_mount"); + + @Override + public void before() throws Exception { + super.before(); + } + + @Test + public void syncV2() + throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + + // Sync one file from UFS + // First pass + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, DirectoryLoadType.BFS, 0 + ).getBaseTask(); + result.waitComplete(0); + System.out.println(result.getTaskInfo().getStats()); + + System.out.println("--------Second pass----------"); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, DirectoryLoadType.BFS, 0 + ).getBaseTask(); + result.waitComplete(0); + System.out.println(result.getTaskInfo().getStats()); + } + + @Test + public void syncV1() + throws FileDoesNotExistException, FileAlreadyExistsException, AccessControlException, + IOException, InvalidPathException { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + + // Sync one file from UFS + long start = CommonUtils.getCurrentMs(); + mFileSystemMaster.listStatus(MOUNT_POINT, listSync(true)); + System.out.println("Time elapsed " + (CommonUtils.getCurrentMs() - start) + "ms"); + } + + @Ignore + @Test + public void generateTestFiles() throws IOException { + int count = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 2; ++k) { + for (int l = 0; l < 2; ++l) { + for (int n = 0; n < 10; ++n) { + for (int m = 0; m < 10000; ++m) { + count++; + if (count % 10000 == 0) { + System.out.println(count); + } + String fileData = "f"; + FileOutputStream fos = + FileUtils.openOutputStream(new File( + String.format( + "%s/%d/%d/%d/%d/%d/f%d", LOCAL_FS_ABSOLUTE_PATH, i, j, k, l, n, m))); + fos.write(fileData.getBytes()); + fos.flush(); + fos.close(); + } + } + } + } + } + } + } + + private ListStatusContext listSync(boolean isRecursive) { + return ListStatusContext.mergeFrom(ListStatusPOptions.newBuilder() + .setRecursive(isRecursive) + .setLoadMetadataType(LoadMetadataPType.ALWAYS) + .setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(0).build() + )); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2Test.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2Test.java new file mode 100644 index 000000000000..76a1d3499d51 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2Test.java @@ -0,0 +1,1375 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.client.WriteType; +import alluxio.concurrent.jsr.CompletableFuture; +import alluxio.exception.InvalidPathException; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.CreateFilePOptions; +import alluxio.grpc.DeletePOptions; +import alluxio.grpc.WritePType; +import alluxio.master.file.contexts.CompleteFileContext; +import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.CreateFileContext; +import alluxio.master.file.contexts.DeleteContext; +import alluxio.master.file.contexts.ExistsContext; +import alluxio.master.file.contexts.MountContext; +import alluxio.master.file.mdsync.BaseTask; +import alluxio.master.file.mdsync.DefaultSyncProcess; +import alluxio.master.file.mdsync.SyncFailReason; +import alluxio.master.file.mdsync.SyncOperation; +import alluxio.master.file.mdsync.TaskStats; +import alluxio.master.file.mdsync.TestSyncProcessor; +import alluxio.master.file.meta.MountTable; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsLoadResult; +import alluxio.underfs.UfsStatus; +import alluxio.underfs.UnderFileSystem; +import alluxio.util.CommonUtils; +import alluxio.wire.FileInfo; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.SynchronousQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; + +/** + * Unit tests for {@link FileSystemMaster}. + */ +@RunWith(Parameterized.class) +public class FileSystemMetadataSyncV2Test extends MetadataSyncV2TestBase { + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][] { + {DirectoryLoadType.SINGLE_LISTING}, + {DirectoryLoadType.BFS}, + {DirectoryLoadType.DFS}, + }); + } + + public FileSystemMetadataSyncV2Test(DirectoryLoadType directoryLoadType) { + mDirectoryLoadType = directoryLoadType; + } + + @Test + public void asyncListingOperations() throws Exception { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, + TEST_DIRECTORY + "/" + TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + + // with depth none only include the path itself + assertEquals(ImmutableList.of(TEST_DIRECTORY + "/"), + listAsync(MOUNT_POINT.join(TEST_DIRECTORY), DescendantType.NONE) + .getItems().map(UfsStatus::getName).collect(Collectors.toList())); + // depth one will have the file and nested directory + assertEquals(ImmutableList.of(TEST_DIRECTORY + "/" + TEST_DIRECTORY + "/", + TEST_DIRECTORY + "/" + TEST_FILE), + listAsync(MOUNT_POINT.join(TEST_DIRECTORY), DescendantType.ONE) + .getItems().map(UfsStatus::getName).collect(Collectors.toList())); + // depth all will only have the files + assertEquals(ImmutableList.of(TEST_DIRECTORY + "/" + TEST_DIRECTORY + "/" + TEST_FILE, + TEST_DIRECTORY + "/" + TEST_FILE), + listAsync(MOUNT_POINT.join(TEST_DIRECTORY), DescendantType.ALL) + .getItems().map(UfsStatus::getName).collect(Collectors.toList())); + } + + UfsLoadResult listAsync(AlluxioURI alluxioPath, DescendantType descendantType) throws Exception { + MountTable.Resolution resolution = mFileSystemMaster.getMountTable().resolve(alluxioPath); + try (CloseableResource ufsClient = + Objects.requireNonNull(mFileSystemMaster.getMountTable() + .getUfsClient(resolution.getMountId())).acquireUfsResource()) { + UfsClient cli = ufsClient.get(); + SynchronousQueue result = new SynchronousQueue<>(); + cli.performListingAsync(resolution.getUri().getPath(), null, null, descendantType, true, + ufsResult -> { + try { + result.put(ufsResult); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }, t -> { + try { + result.put(t); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }); + return (UfsLoadResult) result.take(); + } + } + + @Test + public void syncDirDepth() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + + // Sync the dir + AlluxioURI syncPath = MOUNT_POINT.join(TEST_DIRECTORY); + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, DescendantType.NONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, DescendantType.NONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + + // Sync with depth 1, should see the file + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, DescendantType.NONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + } + + @Test + public void syncNonPersistedNested() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 2L + )); + + // make a non persisted file in the nested path + AlluxioURI nestedPath = MOUNT_POINT.join(TEST_DIRECTORY); + for (int i = 0; i < 3; i++) { + nestedPath = nestedPath.join(TEST_DIRECTORY); + mFileSystemMaster.createDirectory(nestedPath, CreateDirectoryContext.defaults()); + } + mFileSystemMaster.createFile(nestedPath.join("file1"), + CreateFileContext.defaults().setWriteType(WriteType.MUST_CACHE)); + mFileSystemMaster.completeFile(nestedPath.join("file1"), + CompleteFileContext.defaults()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, + mDirectoryLoadType == DirectoryLoadType.SINGLE_LISTING ? 1L : 2L, + SyncOperation.SKIPPED_NON_PERSISTED, 4L // the nested file and its parents + )); + assertTrue(mFileSystemMaster.exists(nestedPath.join("file1"), ExistsContext.defaults())); + + // delete the object and sync again + mS3Client.deleteObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 1L, + SyncOperation.SKIPPED_NON_PERSISTED, 5L // the nested file and its parents + )); + assertTrue(mFileSystemMaster.exists(nestedPath.join("file1"), ExistsContext.defaults())); + } + + @Test + public void syncNonPersistedExists() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + // Sync the file + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + + AlluxioURI filePath = MOUNT_POINT.join(TEST_FILE); + // recreate the file, but put it in alluxio only + mFileSystemMaster.delete(filePath, DeleteContext.mergeFrom( + DeletePOptions.newBuilder().setAlluxioOnly(true))); + mFileSystemMaster.createFile(filePath, + CreateFileContext.defaults().setWriteType(WriteType.MUST_CACHE)); + mFileSystemMaster.completeFile(filePath, CompleteFileContext.defaults()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.SKIPPED_NON_PERSISTED, 1L + )); + } + + @Test + public void syncNonPersisted() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 11L + )); + + // make a non-complete file in the mount path + mFileSystemMaster.createFile(MOUNT_POINT.join("file1"), + CreateFileContext.defaults()); + // make a non persisted file in the nested path + mFileSystemMaster.createFile(MOUNT_POINT.join(TEST_DIRECTORY).join("file1"), + CreateFileContext.defaults().setWriteType(WriteType.MUST_CACHE)); + mFileSystemMaster.completeFile(MOUNT_POINT.join(TEST_DIRECTORY).join("file1"), + CompleteFileContext.defaults()); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.SKIPPED_NON_PERSISTED, 1L, + SyncOperation.NOOP, 1L + )); + + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join("file1"), ExistsContext.defaults())); + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join(TEST_DIRECTORY) + .join("file1"), ExistsContext.defaults())); + + // delete all objects on the UFS + for (int i = 0; i < 10; i++) { + mS3Client.deleteObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE + i); + } + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 10L, + SyncOperation.SKIPPED_NON_PERSISTED, 3L // includes the skipped directory + )); + + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join("file1"), ExistsContext.defaults())); + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join(TEST_DIRECTORY) + .join("file1"), ExistsContext.defaults())); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.SKIPPED_NON_PERSISTED, 3L // includes the skipped directory + )); + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join("file1"), ExistsContext.defaults())); + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join(TEST_DIRECTORY) + .join("file1"), ExistsContext.defaults())); + } + + @Test + public void basicSyncMultiRequest() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_FILE + i, TEST_CONTENT); + } + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 11L + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, + "", mFileSystemMaster, mClient); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 11L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, + "", mFileSystemMaster, mClient); + } + + @Test + public void dirTest() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + + // load the dir with depth 1 + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + List items = mFileSystemMaster.listStatus(MOUNT_POINT, listNoSync(true)); + assertEquals(1, items.size()); + } + + @Test + public void basicSync() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, + "", mFileSystemMaster, mClient); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, + "", mFileSystemMaster, mClient); + } + + @Test + public void testUpdateDirectChildrenLoaded() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "d1/foo", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d2/foo", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d3/d4/foo", TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("d3"), DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + + assertFalse(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + + assertTrue(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT.join("d3"), getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + + assertTrue(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT.join("d3/d4"), getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT.join("d1"), getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT.join("d2"), getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT.join("d1"), getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT.join("d2"), getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + } + + @Test + public void basicSyncNestedMount() throws Throwable { + mS3Client.putObject(TEST_BUCKET, + TEST_DIRECTORY + "/", ""); + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT.join(TEST_DIRECTORY), MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, TEST_DIRECTORY, mFileSystemMaster, mClient); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, TEST_DIRECTORY, mFileSystemMaster, mClient); + } + + @Test + public void basicSyncNestedMountNestedDir() throws Throwable { + mS3Client.putObject(TEST_BUCKET, + TEST_DIRECTORY + "/", ""); + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT.join(TEST_DIRECTORY), MountContext.defaults()); + // create files + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + // create nested files + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + + TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 21L + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, TEST_DIRECTORY, mFileSystemMaster, mClient); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, + mDirectoryLoadType == DirectoryLoadType.SINGLE_LISTING ? 20L : 21L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, TEST_DIRECTORY, mFileSystemMaster, mClient); + } + + @Test + public void basicSyncNestedMountNestedDirWithMarkers() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + // create directory markers + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/", ""); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_DIRECTORY + "/", ""); + // create files + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + // create nested files + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + + TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 22L + )); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 22L + )); + + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void basicSyncEmptyDirWithMarkers() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + // create directory marker + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/", ""); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L, + SyncOperation.NOOP, 0L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void basicSyncNestedFile() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 11L + )); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, + mDirectoryLoadType != DirectoryLoadType.SINGLE_LISTING ? 11L : 10L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void basicSyncDirectory() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + + AlluxioURI syncPath = MOUNT_POINT.join(TEST_DIRECTORY); + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 11L + )); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 10L + )); + } + + @Test + public void syncInodeHappyPath() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + + // Sync one file from UFS + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join(TEST_FILE), DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + FileInfo info = mFileSystemMaster.getFileInfo(MOUNT_POINT.join(TEST_FILE), getNoSync()); + assertFalse(info.isFolder()); + assertTrue(info.isCompleted()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join(TEST_FILE), DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + // Delete the file from UFS, then sync again + mS3Client.deleteObject(TEST_BUCKET, TEST_FILE); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join(TEST_FILE), DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 1L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + } + + @Test + public void syncInodeDescendantTypeNoneHappyPath() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + + // Sync one file from UFS + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join(TEST_FILE), DescendantType.NONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + FileInfo info = mFileSystemMaster.getFileInfo(MOUNT_POINT.join(TEST_FILE), getNoSync()); + assertFalse(info.isFolder()); + assertTrue(info.isCompleted()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void deleteOneAndAddAnother() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "foo/a", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "foo/c", TEST_CONTENT); + + // Sync two files from UFS + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("foo"), DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 3L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + // Delete one and create another + mS3Client.deleteObject(TEST_BUCKET, "foo/a"); + mS3Client.putObject(TEST_BUCKET, "foo/b", TEST_CONTENT); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("foo"), DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L, + SyncOperation.DELETE, 1L, + SyncOperation.NOOP, 1L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void deleteDirectory() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "d1/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d1/f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d2/f1", TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 5L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + mS3Client.deleteObject(TEST_BUCKET, "d1/f1"); + mS3Client.deleteObject(TEST_BUCKET, "d1/f2"); + mS3Client.putObject(TEST_BUCKET, "d0/f1", TEST_CONTENT); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + + // "d2/f1" + long noopCount = 1; + if (mDirectoryLoadType != DirectoryLoadType.SINGLE_LISTING) { + // "d2" + noopCount++; + } + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 2L, + SyncOperation.DELETE, 3L, + SyncOperation.NOOP, noopCount + )); + + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void syncInodeHappyPathNestedObjects() throws Throwable { + mS3Client.putObject(TEST_BUCKET, "d1/1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d1/2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d1/3", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d2/1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d2/2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d2/3", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d3/1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d3/2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d3/3", TEST_CONTENT); + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + + // count the files + long numInodes = 9; + // count the directories + numInodes += 3; + + // Sync one file from UFS + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, numInodes + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + // count the files + long noopCount = 9; + if (mDirectoryLoadType != DirectoryLoadType.SINGLE_LISTING) { + // count the directories + noopCount += 3; + } + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, noopCount + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void syncNestedObjectsCreateThenDelete() throws Throwable { + mS3Client.putObject(TEST_BUCKET, "d/1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d/2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d/3", TEST_CONTENT); + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + + // count the files + long numInodes = 3; + // count the directories + numInodes += 1; + + // Sync one file from UFS + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, numInodes + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + mS3Client.deleteObject(TEST_BUCKET, "d/1"); + mS3Client.deleteObject(TEST_BUCKET, "d/2"); + mS3Client.deleteObject(TEST_BUCKET, "d/3"); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 4L + )); + } + + @Test + public void syncInodeUfsDown() + throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + stopS3Server(); + final BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + assertThrows(IOException.class, () -> { + result.waitComplete(TIMEOUT_MS); + }); + assertSyncFailureReason(result.getTaskInfo(), SyncFailReason.LOADING_UFS_IO_FAILURE); + + assertFalse(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + + startS3Server(); + } + + @Test + public void syncInodeProcessingErrorHandling() + throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + TestSyncProcessor syncer = (TestSyncProcessor) mFileSystemMaster.getMetadataSyncer(); + syncer.beforePerformSyncOne((ignored) -> { + throw new Exception("fail"); + }); + final BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + assertThrows(Exception.class, () -> { + result.waitComplete(TIMEOUT_MS); + }); + assertSyncFailureReason(result.getTaskInfo(), SyncFailReason.PROCESSING_UNKNOWN); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + + syncer.beforePerformSyncOne((context) -> { + Exception e = new Exception("fail"); + context.reportSyncFailReason(SyncFailReason.PROCESSING_CONCURRENT_UPDATE_DURING_SYNC, e); + throw e; + }); + final BaseTask result2 = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + assertThrows(Exception.class, () -> { + result2.waitComplete(TIMEOUT_MS); + }); + assertSyncFailureReason(result2.getTaskInfo(), + SyncFailReason.PROCESSING_CONCURRENT_UPDATE_DURING_SYNC); + } + + @Test + public void syncDirectoryHappyPath() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "file1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "file2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "file3", TEST_CONTENT); + + // To recreate -> content hashes are different + mFileSystemMaster.createFile(MOUNT_POINT.join("file1"), CreateFileContext.mergeFrom( + CreateFilePOptions.newBuilder().setWriteType(WritePType.THROUGH))); + mFileSystemMaster.completeFile(MOUNT_POINT.join("file1"), CompleteFileContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "file1", TEST_CONTENT + "diff"); + + // To delete -> doesn't exist in UFS + mFileSystemMaster.createDirectory(MOUNT_POINT.join("directory1"), + CreateDirectoryContext.defaults()); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + // file2 & file 3 + SyncOperation.CREATE, 2L, + // directory1 + SyncOperation.DELETE, 1L, + // file1 + SyncOperation.RECREATE, 1L + )); + } + + @Test + public void syncDirectoryTestUFSIteration() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + for (int i = 0; i < 100; ++i) { + mS3Client.putObject(TEST_BUCKET, "file" + i, ""); + } + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 100L + )); + } + + @Test + public void syncDirectoryTestUFSIterationRecursive() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + int filePerDirectory = 5; + // count the files + int createdInodeCount = filePerDirectory * filePerDirectory * filePerDirectory; + // count the directories + createdInodeCount += filePerDirectory * filePerDirectory + filePerDirectory; + + for (int i = 0; i < filePerDirectory; ++i) { + for (int j = 0; j < filePerDirectory; ++j) { + for (int k = 0; k < filePerDirectory; ++k) { + mS3Client.putObject(TEST_BUCKET, String.format("%d/%d/%d", i, j, k), ""); + } + } + } + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + assertTrue(result.succeeded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, (long) createdInodeCount + )); + + // count the files + int noopInodeCount = filePerDirectory * filePerDirectory * filePerDirectory; + if (mDirectoryLoadType != DirectoryLoadType.SINGLE_LISTING) { + // count the directories + noopInodeCount += filePerDirectory * filePerDirectory + filePerDirectory; + } + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + // All created node were not changed. + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, (long) noopInodeCount + )); + } + + @Test + public void syncNonS3DirectoryDelete() + throws Throwable { + // Create a directory not on local ufs + mFileSystemMaster.createDirectory(new AlluxioURI("/test_directory"), + CreateDirectoryContext.defaults()); + mFileSystemMaster.createDirectory(new AlluxioURI("/test_directory/sub_directory"), + CreateDirectoryContext.defaults()); + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_directory"), DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 2L + )); + + // Create a directory not on local ufs + mFileSystemMaster.createDirectory(new AlluxioURI("/test_directory"), + CreateDirectoryContext.defaults()); + mFileSystemMaster.createDirectory(new AlluxioURI("/test_directory/sub_directory"), + CreateDirectoryContext.defaults()); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_directory"), DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 2L + )); + + // Create a directory not on local ufs + mFileSystemMaster.createDirectory(new AlluxioURI("/test_directory"), + CreateDirectoryContext.defaults()); + mFileSystemMaster.createDirectory(new AlluxioURI("/test_directory/sub_directory"), + CreateDirectoryContext.defaults()); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_directory"), DescendantType.NONE, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 2L + )); + } + + @Test + public void testS3Fingerprint() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "f3", TEST_CONTENT); + + // Sync to load metadata + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 3L + )); + + mS3Client.putObject(TEST_BUCKET, "f1", ""); + mS3Client.putObject(TEST_BUCKET, "f2", TEST_CONTENT); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + // f1, f3 + SyncOperation.NOOP, 2L, + // f2 + SyncOperation.RECREATE, 1L + )); + } + + @Test + public void syncNoneOnMountPoint1() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "d1/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d1/f2", TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.NONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + )); + } + + @Test + public void syncNoneOnMountPoint2() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "d1/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d1/f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d2/f1", TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.NONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + )); + } + + @Test + public void syncUfsNotFound() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("/non_existing_path"), DescendantType.ALL, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(mFileSystemMaster.getAbsentPathCache().isAbsentSince( + new AlluxioURI("/non_existing_path"), 0)); + } + + @Test + public void unmountDuringSync() throws Exception { + TestSyncProcessor syncer = (TestSyncProcessor) mFileSystemMaster.getMetadataSyncer(); + + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + for (int i = 0; i < 100; ++i) { + mS3Client.putObject(TEST_BUCKET, "file" + i, ""); + } + + BaseTask baseTask = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + + AtomicBoolean unmount = new AtomicBoolean(false); + syncer.blockUntilNthSyncThenDo(50, () -> unmount.set(true)); + CompletableFuture unmountFuture = CompletableFuture.supplyAsync(() -> { + try { + while (!unmount.get()) { + CommonUtils.sleepMs(1); + } + mFileSystemMaster.unmount(MOUNT_POINT); + return null; + } catch (Throwable e) { + throw new RuntimeException(e); + } + }); + + unmountFuture.get(); + assertThrows(DefaultSyncProcess.MountPointNotFoundRuntimeException.class, + () -> baseTask.waitComplete(TIMEOUT_MS)); + + assertFalse(baseTask.succeeded()); + assertFalse(mFileSystemMaster.exists(MOUNT_POINT, existsNoSync())); + + Map syncFailures = + baseTask.getTaskInfo().getStats().getSyncFailReasons(); + Set + reasons = syncFailures.values().stream().map(TaskStats.SyncFailure::getSyncFailReason) + .collect(Collectors.toSet()); + assertTrue(reasons.contains(SyncFailReason.PROCESSING_MOUNT_POINT_DOES_NOT_EXIST) + || reasons.contains(SyncFailReason.LOADING_MOUNT_POINT_DOES_NOT_EXIST)); + } + + @Test + public void concurrentDelete() throws Exception { + TestSyncProcessor syncer = (TestSyncProcessor) mFileSystemMaster.getMetadataSyncer(); + + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + // Create a directory not on s3 ufs + mFileSystemMaster.createDirectory(MOUNT_POINT.join("/d"), + CreateDirectoryContext.defaults().setWriteType(WriteType.MUST_CACHE)); + // Create something else into s3 + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + AtomicReference baseTask = new AtomicReference<>(); + CompletableFuture syncFuture = CompletableFuture.supplyAsync(() -> { + try { + baseTask.set(mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask()); + baseTask.get().waitComplete(TIMEOUT_MS); + return null; + } catch (Throwable t) { + throw new RuntimeException(t); + } + }); + + // blocks on the sync of "/d" (the 1st sync target) + syncer.blockUntilNthSyncThenDo(1, () -> { + mFileSystemMaster.delete(MOUNT_POINT.join("/d"), DeleteContext.create( + DeletePOptions.newBuilder().setAlluxioOnly(true))); + }); + syncFuture.get(); + assertTrue(baseTask.get().succeeded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + assertSyncOperations(baseTask.get().getTaskInfo(), ImmutableMap.of( + // /test_file + SyncOperation.CREATE, 1L, + // /d + SyncOperation.SKIPPED_DUE_TO_CONCURRENT_MODIFICATION, 1L + )); + } + + @Test + public void concurrentCreate() throws Exception { + TestSyncProcessor syncer = (TestSyncProcessor) mFileSystemMaster.getMetadataSyncer(); + + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + // Create the test file into s3 + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + AtomicReference baseTask = new AtomicReference<>(); + CompletableFuture syncFuture = CompletableFuture.supplyAsync(() -> { + try { + baseTask.set(mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask()); + baseTask.get().waitComplete(TIMEOUT_MS); + return null; + } catch (Throwable t) { + throw new RuntimeException(t); + } + }); + + // blocks on the sync of "/test_file" (the 1st sync target) + syncer.blockUntilNthSyncThenDo(1, () -> { + mFileSystemMaster.createFile( + MOUNT_POINT.join(TEST_FILE), + CreateFileContext.defaults().setWriteType(WriteType.MUST_CACHE)); + }); + syncFuture.get(); + assertTrue(baseTask.get().succeeded()); + assertSyncOperations(baseTask.get().getTaskInfo(), ImmutableMap.of( + // /test_file + SyncOperation.SKIPPED_DUE_TO_CONCURRENT_MODIFICATION, 1L + )); + } + + @Test + public void startAfter() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "f3", TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0, "f3", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(0, mFileSystemMaster.listStatus(MOUNT_POINT, listNoSync(false)).size()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0, "f2", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(1, mFileSystemMaster.listStatus(MOUNT_POINT, listNoSync(false)).size()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0, "f1", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(2, mFileSystemMaster.listStatus(MOUNT_POINT, listNoSync(false)).size()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0, "f0", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(3, mFileSystemMaster.listStatus(MOUNT_POINT, listNoSync(false)).size()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0, null, false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(3, mFileSystemMaster.listStatus(MOUNT_POINT, listNoSync(false)).size()); + } + + @Test + public void startAfterAbsolutePath() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "root/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/f3", TEST_CONTENT); + // The S3 mock server has a bug where 403 is returned if startAfter exceeds the last + // object key. + assertThrows(InvalidPathException.class, () -> { + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("root"), DescendantType.ONE, mDirectoryLoadType, + 0, "/random/path", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + }); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("root"), DescendantType.ONE, mDirectoryLoadType, 0, + "/s3_mount/root/f2", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(1, mFileSystemMaster.listStatus(MOUNT_POINT.join("root"), + listNoSync(false)).size()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("root"), DescendantType.ONE, mDirectoryLoadType, 0, + "/s3_mount/root", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(3, mFileSystemMaster.listStatus(MOUNT_POINT.join("root"), + listNoSync(false)).size()); + } + + @Test + public void startAfterRecursive() throws Throwable { + if (mDirectoryLoadType != DirectoryLoadType.SINGLE_LISTING) { + // NOT SUPPORTED + return; + } + + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "root/d1/d1/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/d1/d1/f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/d1/d2/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/d1/d2/f3", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/d1/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/d2/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/f1", TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("root"), DescendantType.ALL, mDirectoryLoadType, 0, "d1/d2/f2", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + /* + (under "/s3_mount/root") + /d1 + /d2 + /f3 + /f1 + /d2 + /d1 + /f1 + */ + assertEquals(7, + mFileSystemMaster.listStatus(MOUNT_POINT.join("root"), listNoSync(true)).size()); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/MetadataSyncDepthV2Test.java b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncDepthV2Test.java new file mode 100644 index 000000000000..a528cd332f48 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncDepthV2Test.java @@ -0,0 +1,204 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.master.file.contexts.MountContext; +import alluxio.master.file.mdsync.BaseTask; +import alluxio.master.file.mdsync.SyncOperation; + +import com.google.common.collect.ImmutableMap; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.Collection; + +@RunWith(Parameterized.class) +public class MetadataSyncDepthV2Test extends MetadataSyncV2TestBase { + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][]{ + {DirectoryLoadType.SINGLE_LISTING, DescendantType.ALL}, + {DirectoryLoadType.BFS, DescendantType.ALL}, + {DirectoryLoadType.DFS, DescendantType.ALL}, + {DirectoryLoadType.SINGLE_LISTING, DescendantType.ONE}, + {DirectoryLoadType.BFS, DescendantType.ONE}, + {DirectoryLoadType.DFS, DescendantType.ONE}, + {DirectoryLoadType.SINGLE_LISTING, DescendantType.NONE}, + {DirectoryLoadType.BFS, DescendantType.NONE}, + {DirectoryLoadType.DFS, DescendantType.NONE}, + }); + } + + DescendantType mDescendantType; + + public MetadataSyncDepthV2Test( + DirectoryLoadType directoryLoadType, DescendantType descendantType) { + mDescendantType = descendantType; + mDirectoryLoadType = directoryLoadType; + } + + @Test + public void syncSingleDir() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/", ""); + + // Sync the dir + AlluxioURI syncPath = MOUNT_POINT.join(TEST_DIRECTORY); + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + } + + @Test + public void syncSingleDirNested() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + String dirPath = TEST_DIRECTORY + "/" + TEST_DIRECTORY + "/"; + mS3Client.putObject(TEST_BUCKET, dirPath, ""); + + // Sync the dir + AlluxioURI syncPath = MOUNT_POINT.join(TEST_DIRECTORY).join(TEST_DIRECTORY); + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 2L + )); + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + + // Delete the dir + mS3Client.deleteObject(TEST_BUCKET, dirPath); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 1L + )); + + // The parent should also be gone + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join(TEST_DIRECTORY), mDescendantType, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 1L + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + + // Sync the root, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of()); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void syncSingleFile() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + + // Sync the file + AlluxioURI syncPath = MOUNT_POINT.join(TEST_DIRECTORY).join(TEST_FILE); + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 2L + )); + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + + // update the metadata for the path + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT_MODIFIED); + + // Sync should see the change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.RECREATE, 1L + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + + // Delete the file + mS3Client.deleteObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE); + // Sync the root, all should be removed + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, mDescendantType == DescendantType.NONE ? 0L : 2L + )); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + boolean exists = mFileSystemMaster.exists(syncPath, existsNoSync()); + if (mDescendantType == DescendantType.NONE) { + // since we only synced the root path, the nested file should not be deleted + assertTrue(exists); + } else { + assertFalse(exists); + } + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/MetadataSyncMultiMountV2Test.java b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncMultiMountV2Test.java new file mode 100644 index 000000000000..30d7df0fd33c --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncMultiMountV2Test.java @@ -0,0 +1,199 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.client.WriteType; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.MountContext; +import alluxio.master.file.mdsync.SyncOperation; +import alluxio.master.file.mdsync.TaskGroup; +import alluxio.wire.FileInfo; + +import com.google.common.collect.ImmutableMap; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +@RunWith(Parameterized.class) +public class MetadataSyncMultiMountV2Test extends MetadataSyncV2TestBase { + public MetadataSyncMultiMountV2Test(DirectoryLoadType directoryLoadType) { + mDirectoryLoadType = directoryLoadType; + } + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][] { + {DirectoryLoadType.SINGLE_LISTING}, + {DirectoryLoadType.BFS}, + {DirectoryLoadType.DFS}, + }); + } + + @Test + public void syncNonS3DirectoryShadowingMountPoint() + throws Throwable { + /* + / (root) -> local file system (disk) + /s3_mount -> s3 bucket + create /s3_mount in the local first system that shadows the mount point and then do + a metadata sync on root + the sync of the local file system /s3_mount is expected to be skipped + */ + + String localUfsPath + = mFileSystemMaster.getMountTable().resolve(MOUNT_POINT).getUri().getPath(); + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + assertTrue(new File(localUfsPath).createNewFile()); + TaskGroup result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0); + result.waitAllComplete(TIMEOUT_MS); + assertTrue(result.allSucceeded()); + assertSyncOperations(result, ImmutableMap.of( + SyncOperation.SKIPPED_ON_MOUNT_POINT, 1L + )); + FileInfo mountPointFileInfo = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()); + assertTrue(mountPointFileInfo.isMountPoint()); + assertTrue(mountPointFileInfo.isFolder()); + } + + @Test + public void syncNestedS3Mount() + throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mFileSystemMaster.mount(NESTED_S3_MOUNT_POINT, UFS_ROOT2, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET2, "f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET2, "d/f2", TEST_CONTENT); + + /* + / (ROOT) -> unchanged (root mount point local fs) + /s3_mount -> unchanged (mount point s3://test-bucket) + /f1 -> created + /d -> pseudo directory (created) + /f1 -> (created) + /nested_s3_mount -> unchanged (mount point s3://test-bucket-2) + /f2 -> created + /d -> pseudo directory (created) + /f2 -> (created) + */ + TaskGroup result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0); + result.waitAllComplete(TIMEOUT_MS); + assertSyncOperations(result, ImmutableMap.of( + SyncOperation.CREATE, 6L + )); + assertTrue(result.allSucceeded()); + + List inodes = mFileSystemMaster.listStatus(new AlluxioURI("/"), listNoSync(true)); + assertEquals(8, inodes.size()); + assertTrue(mFileSystemMaster.exists(NESTED_S3_MOUNT_POINT.join("d/f2"), existsNoSync())); + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join("d/f1"), existsNoSync())); + } + + @Test + public void syncNestedS3MountShadowingMountPoint() + throws Throwable { + /* + / (ROOT) -> unchanged (root mount point local fs) + /s3_mount -> unchanged (mount point s3://test-bucket) + /nested_s3_mount -> unchanged (mount point s3://test-bucket-2) + /foo -> created + /nested_s3_mount -> SHADOWED (mount point s3://test-bucket) + /shadowed -> SHADOWED + /bar/baz -> SHADOWED + /not_shadowed -> created + */ + + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mFileSystemMaster.mount(NESTED_S3_MOUNT_POINT, UFS_ROOT2, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "nested_s3_mount/shadowed", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "nested_s3_mount/bar/baz", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "not_shadowed", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET2, "foo", TEST_CONTENT); + + TaskGroup result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0); + result.waitAllComplete(TIMEOUT_MS); + result.getTasks() + .forEach(it -> System.out.println(it.getTaskInfo().getStats().toReportString())); + assertSyncOperations(result, ImmutableMap.of( + SyncOperation.CREATE, 2L, + SyncOperation.SKIPPED_ON_MOUNT_POINT, mDirectoryLoadType + == DirectoryLoadType.SINGLE_LISTING ? 2L : 1L + )); + assertTrue(result.allSucceeded()); + List inodes = mFileSystemMaster.listStatus(new AlluxioURI("/"), listNoSync(true)); + assertEquals(4, inodes.size()); + } + + @Test + public void syncS3NestedMountLocalFs() + throws Throwable { + // mount /s3_mount -> s3://test-bucket + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "foo/bar", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "foo/baz", TEST_CONTENT); + + mFileSystemMaster.createDirectory(new AlluxioURI("/mnt"), + CreateDirectoryContext.defaults().setWriteType(WriteType.THROUGH)); + // mount /mnt/nested_s3_mount -> s3://test-bucket-2 + mFileSystemMaster.mount(NESTED_MOUNT_POINT, UFS_ROOT2, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET2, "foo/bar", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET2, "foo/baz", TEST_CONTENT); + + TaskGroup result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ONE, mDirectoryLoadType, 0); + result.waitAllComplete(TIMEOUT_MS); + assertTrue(result.allSucceeded()); + assertSyncOperations(result, ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + assertEquals(1, result.getTaskCount()); + + /* + / (ROOT) -> unchanged (root mount point local fs) + /s3_mount -> unchanged (mount point s3://test-bucket) + /foo -> pseudo directory (created) + /bar -> (created) + /baz -> (created) + /mnt -> unchanged + /nested_s3_mount -> unchanged (mount point s3://test-bucket-2) + /foo -> pseudo directory (created) + /bar -> (created) + /baz -> (created) + */ + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0); + result.waitAllComplete(TIMEOUT_MS); + assertTrue(result.allSucceeded()); + assertSyncOperations(result, ImmutableMap.of( + SyncOperation.NOOP, 1L, + SyncOperation.CREATE, 6L + )); + assertEquals(3, result.getTaskCount()); + + List inodes = mFileSystemMaster.listStatus(new AlluxioURI("/"), listNoSync(true)); + assertEquals(9, inodes.size()); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/MetadataSyncNonObjectStoreV2Test.java b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncNonObjectStoreV2Test.java new file mode 100644 index 000000000000..0c8ced898029 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncNonObjectStoreV2Test.java @@ -0,0 +1,163 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import static alluxio.master.file.MetadataSyncV2TestBase.TIMEOUT_MS; +import static alluxio.master.file.MetadataSyncV2TestBase.assertSyncOperations; +import static alluxio.master.file.MetadataSyncV2TestBase.existsNoSync; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.client.WriteType; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.CreateDirectoryPOptions; +import alluxio.grpc.DeletePOptions; +import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.DeleteContext; +import alluxio.master.file.mdsync.BaseTask; +import alluxio.master.file.mdsync.SyncOperation; +import alluxio.security.authorization.Mode; + +import com.google.common.collect.ImmutableMap; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.util.Arrays; +import java.util.Collection; + +@RunWith(Parameterized.class) +public class MetadataSyncNonObjectStoreV2Test extends FileSystemMasterTestBase { + + DirectoryLoadType mDirectoryLoadType; + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][] { + {DirectoryLoadType.SINGLE_LISTING}, + {DirectoryLoadType.BFS}, + {DirectoryLoadType.DFS}, + }); + } + + public MetadataSyncNonObjectStoreV2Test(DirectoryLoadType directoryLoadType) { + mDirectoryLoadType = directoryLoadType; + } + + @Test + public void syncEmptyDirectory() + throws Throwable { + String path = mFileSystemMaster.getMountTable().resolve(new AlluxioURI("/")).getUri().getPath(); + assertTrue(new File(path + "/test_directory").mkdir()); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + assertTrue(mFileSystemMaster.exists(new AlluxioURI("/test_directory"), existsNoSync())); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + } + + @Test + public void syncNonS3DirectorySync() + throws Throwable { + String path = mFileSystemMaster.getMountTable().resolve(new AlluxioURI("/")).getUri().getPath(); + assertTrue(new File(path + "/test_file").createNewFile()); + assertTrue(new File(path + "/test_directory").mkdir()); + assertTrue(new File(path + "/test_directory/test_file").createNewFile()); + assertTrue(new File(path + "/test_directory/nested_directory").mkdir()); + assertTrue(new File(path + "/test_directory/nested_directory/test_file").createNewFile()); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_directory"), DescendantType.NONE, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + assertTrue(mFileSystemMaster.exists(new AlluxioURI("/test_directory"), existsNoSync())); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_file"), DescendantType.NONE, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + assertTrue(mFileSystemMaster.exists(new AlluxioURI("/test_file"), existsNoSync())); + + // TODO(yimin) when the descendant type is ONE/ALL, seems like the NOOP of the root inode + // itself is not counted. + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_directory"), DescendantType.ONE, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 2L, + SyncOperation.NOOP, 1L + )); + assertTrue(mFileSystemMaster.exists(new AlluxioURI("/test_directory"), existsNoSync())); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_directory"), DescendantType.ALL, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L, + SyncOperation.NOOP, 3L + )); + assertTrue(mFileSystemMaster.exists(new AlluxioURI("/test_directory"), existsNoSync())); + } + + @Test + public void testNonS3Fingerprint() throws Throwable { + // this essentially creates a directory and mode its alluxio directory without + // syncing the change down to ufs + mFileSystemMaster.createDirectory(new AlluxioURI("/d"), + CreateDirectoryContext.defaults().setWriteType(WriteType.THROUGH)); + mFileSystemMaster.delete(new AlluxioURI("/d"), + DeleteContext.mergeFrom(DeletePOptions.newBuilder().setAlluxioOnly(true))); + mFileSystemMaster.createDirectory(new AlluxioURI("/d"), + CreateDirectoryContext.mergeFrom( + CreateDirectoryPOptions.newBuilder().setMode(new Mode((short) 0777).toProto())) + .setWriteType(WriteType.MUST_CACHE)); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + // d + SyncOperation.UPDATE, 1L + )); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/MetadataSyncV2TestBase.java b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncV2TestBase.java new file mode 100644 index 000000000000..a5053d7718e5 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncV2TestBase.java @@ -0,0 +1,308 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.collections.Pair; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.ExistsPOptions; +import alluxio.grpc.FileSystemMasterCommonPOptions; +import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.LoadMetadataPType; +import alluxio.master.file.contexts.ExistsContext; +import alluxio.master.file.contexts.GetStatusContext; +import alluxio.master.file.contexts.ListStatusContext; +import alluxio.master.file.mdsync.SyncFailReason; +import alluxio.master.file.mdsync.SyncOperation; +import alluxio.master.file.mdsync.TaskGroup; +import alluxio.master.file.mdsync.TaskInfo; +import alluxio.master.file.mdsync.TaskStats; +import alluxio.util.io.PathUtils; +import alluxio.wire.FileInfo; + +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.regions.Regions; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import org.gaul.s3proxy.S3Proxy; +import org.gaul.s3proxy.junit.S3ProxyJunitCore; +import org.gaul.s3proxy.junit.S3ProxyRule; +import org.junit.Rule; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.CommonPrefix; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.services.s3.paginators.ListObjectsV2Iterable; + +import java.lang.reflect.Field; +import java.util.Arrays; +import java.util.Iterator; +import java.util.Map; +import java.util.Stack; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class MetadataSyncV2TestBase extends FileSystemMasterTestBase { + static final Logger LOG = LoggerFactory.getLogger(FileSystemMetadataSyncV2Test.class); + static final String TEST_BUCKET = "alluxio-mdsync-test-bucket"; + static final String TEST_BUCKET2 = "alluxio-mdsync-test-bucket-2"; + static final String TEST_FILE = "test_file"; + static final String TEST_DIRECTORY = "test_directory"; + static final String TEST_CONTENT = "test_content"; + static final String TEST_CONTENT_MODIFIED = "test_content_modified"; + static final AlluxioURI UFS_ROOT = new AlluxioURI("s3://" + TEST_BUCKET + "/"); + static final AlluxioURI UFS_ROOT2 = new AlluxioURI("s3://" + TEST_BUCKET2 + "/"); + static final AlluxioURI MOUNT_POINT = new AlluxioURI("/s3_mount"); + static final AlluxioURI MOUNT_POINT2 = new AlluxioURI("/s3_mount2"); + static final AlluxioURI NESTED_MOUNT_POINT = new AlluxioURI("/mnt/nested_s3_mount"); + static final AlluxioURI NESTED_S3_MOUNT_POINT = + new AlluxioURI("/s3_mount/nested_s3_mount"); + static final long TIMEOUT_MS = 30_000; + + @Rule + public S3ProxyRule mS3Proxy = S3ProxyRule.builder() + .withBlobStoreProvider("transient") + .withCredentials("_", "_") + .build(); + + boolean mUseRealS3 = false; + AmazonS3 mS3Client; + S3Client mClient; + DirectoryLoadType mDirectoryLoadType; + + @Override + public void before() throws Exception { + Configuration.set(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_ENABLED, false); + Configuration.set(PropertyKey.UNDERFS_LISTING_LENGTH, 2); + + if (mUseRealS3) { + Configuration.set(PropertyKey.UNDERFS_S3_REGION, "us-west-1"); + mClient = S3Client.builder().region(Region.US_WEST_1).build(); + mS3Client = AmazonS3ClientBuilder.standard() + .withRegion(Region.US_WEST_1.toString()).build(); + } else { + Configuration.set(PropertyKey.UNDERFS_S3_ENDPOINT, + mS3Proxy.getUri().getHost() + ":" + mS3Proxy.getUri().getPort()); + Configuration.set(PropertyKey.UNDERFS_S3_ENDPOINT_REGION, "us-west-2"); + Configuration.set(PropertyKey.UNDERFS_S3_DISABLE_DNS_BUCKETS, true); + Configuration.set(PropertyKey.S3A_ACCESS_KEY, mS3Proxy.getAccessKey()); + Configuration.set(PropertyKey.S3A_SECRET_KEY, mS3Proxy.getSecretKey()); + mClient = S3Client.builder().credentialsProvider(StaticCredentialsProvider.create( + AwsBasicCredentials.create(mS3Proxy.getAccessKey(), mS3Proxy.getSecretKey()))) + .endpointOverride(mS3Proxy.getUri()).region(Region.US_WEST_2) + .build(); + + mS3Client = AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withCredentials( + new AWSStaticCredentialsProvider( + new BasicAWSCredentials(mS3Proxy.getAccessKey(), mS3Proxy.getSecretKey()))) + .withEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration(mS3Proxy.getUri().toString(), + Regions.US_WEST_2.getName())) + .build(); + } + mS3Client.createBucket(TEST_BUCKET); + mS3Client.createBucket(TEST_BUCKET2); + super.before(); + } + + @Override + public void after() throws Exception { + mS3Client.shutdown(); + mClient.close(); + try { + stopS3Server(); + } catch (Exception e) { + LOG.error("Closing s3 mock server failed", e); + } + super.after(); + } + + ListStatusContext listSync(boolean isRecursive) { + return ListStatusContext.mergeFrom(ListStatusPOptions.newBuilder() + .setRecursive(isRecursive) + .setLoadMetadataType(LoadMetadataPType.ALWAYS) + .setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(0).build() + )); + } + + ListStatusContext listNoSync(boolean isRecursive) { + return ListStatusContext.mergeFrom(ListStatusPOptions.newBuilder() + .setRecursive(isRecursive) + .setLoadMetadataType(LoadMetadataPType.NEVER) + .setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(-1).build() + )); + } + + GetStatusContext getNoSync() { + return GetStatusContext.mergeFrom(GetStatusPOptions.newBuilder() + .setLoadMetadataType(LoadMetadataPType.NEVER) + .setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(-1).build() + )); + } + + static ExistsContext existsNoSync() { + return ExistsContext.mergeFrom(ExistsPOptions.newBuilder() + .setLoadMetadataType(LoadMetadataPType.NEVER) + .setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(-1).build() + )); + } + + void stopS3Server() { + try { + Field coreField = S3ProxyRule.class.getDeclaredField("core"); + coreField.setAccessible(true); + S3ProxyJunitCore core = (S3ProxyJunitCore) coreField.get(mS3Proxy); + Field s3ProxyField = S3ProxyJunitCore.class.getDeclaredField("s3Proxy"); + s3ProxyField.setAccessible(true); + S3Proxy proxy = (S3Proxy) s3ProxyField.get(core); + proxy.stop(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + void startS3Server() { + try { + Field coreField = S3ProxyRule.class.getDeclaredField("core"); + coreField.setAccessible(true); + S3ProxyJunitCore core = (S3ProxyJunitCore) coreField.get(mS3Proxy); + Field s3ProxyField = S3ProxyJunitCore.class.getDeclaredField("s3Proxy"); + s3ProxyField.setAccessible(true); + S3Proxy proxy = (S3Proxy) s3ProxyField.get(core); + proxy.start(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public static void checkUfsMatches( + AlluxioURI alluxioPath, String s3Bucket, + String mountPrefix, + DefaultFileSystemMaster master, S3Client s3client) throws Exception { + + Stack> toCheck = new Stack<>(); + toCheck.push(new Pair<>(alluxioPath.getPath(), mountPrefix)); + while (!toCheck.isEmpty()) { + Pair nxt = toCheck.pop(); + + Iterator alluxioItems = master.listStatus(new AlluxioURI(nxt.getFirst()), + ListStatusContext.defaults().disableMetadataSync()).stream().iterator(); + Iterator> ufsItems = listUfsPath(s3Bucket, nxt.getSecond(), s3client, + mountPrefix, alluxioPath.getPath()); + while (alluxioItems.hasNext()) { + FileInfo nxtAlluxio = alluxioItems.next(); + if (!ufsItems.hasNext()) { + throw new IllegalStateException( + String.format("Ufs did not find alluxio item %s", nxtAlluxio)); + } + Pair nxtUfs = ufsItems.next(); + String nxtInode = nxtAlluxio.getPath(); + if (nxtAlluxio.isFolder()) { + toCheck.push(new Pair<>(nxtAlluxio.getPath(), nxtUfs.getSecond())); + nxtInode = PathUtils.normalizePath(nxtInode, AlluxioURI.SEPARATOR); + } + // System.out.printf("Checking %s, %s%n", nxtInode, nxtUfs.getFirst()); + assertEquals(nxtInode, nxtUfs.getFirst()); + } + if (ufsItems.hasNext()) { + throw new IllegalStateException( + String.format("alluxio did not find ufs item %s", ufsItems.next())); + } + } + } + + static Iterator> listUfsPath( + String s3Bucket, String s3Path, S3Client client, + String mountPrefix, String alluxioPrefix) { + String normalizedPrefix = PathUtils.normalizePath(alluxioPrefix, AlluxioURI.SEPARATOR); + if (!s3Path.isEmpty()) { + s3Path = PathUtils.normalizePath(s3Path, AlluxioURI.SEPARATOR); + } + if (!mountPrefix.isEmpty()) { + mountPrefix = PathUtils.normalizePath(mountPrefix, AlluxioURI.SEPARATOR); + } + ListObjectsV2Iterable result = client.listObjectsV2Paginator(ListObjectsV2Request.builder() + .bucket(s3Bucket).delimiter(AlluxioURI.SEPARATOR).prefix(s3Path).build()); + String finalMountPrefix = mountPrefix; + String finalS3Path = s3Path; + return result.stream().flatMap(resp -> + Stream.concat(resp.commonPrefixes().stream().map(CommonPrefix::prefix), + resp.contents().stream().map(S3Object::key))) + .filter(nxt -> { + assertTrue(nxt.startsWith(finalS3Path)); + return nxt.length() > finalS3Path.length(); + }).sorted().distinct() + .map(nxt -> new Pair<>( + normalizedPrefix + nxt.substring(finalMountPrefix.length()), nxt)).iterator(); + } + + static void assertSyncOperations(TaskInfo taskInfo, Map operations) { + assertSyncOperations(taskInfo.getStats().getSuccessOperationCount(), operations); + } + + static void assertSyncOperations(TaskGroup taskGroup, Map operations) { + AtomicLong[] stats = new AtomicLong[SyncOperation.values().length]; + for (int i = 0; i < stats.length; ++i) { + stats[i] = new AtomicLong(); + } + taskGroup.getTasks().forEach( + it -> { + AtomicLong[] taskStats = it.getTaskInfo().getStats().getSuccessOperationCount(); + for (int i = 0; i < taskStats.length; ++i) { + stats[i].addAndGet(taskStats[i].get()); + } + } + ); + assertSyncOperations(stats, operations); + } + + private static void assertSyncOperations( + AtomicLong[] stats, Map operations) { + for (SyncOperation operation : SyncOperation.values()) { + assertEquals( + "Operation " + operation.toString() + " count not equal. " + + "Actual operation count: " + + Arrays.toString(stats), + (long) operations.getOrDefault(operation, 0L), + stats[operation.getValue()].get() + ); + } + } + + static void assertSyncFailureReason(TaskInfo taskInfo, SyncFailReason failReason) { + Map failReasons = taskInfo.getStats().getSyncFailReasons(); + assertEquals(1, failReasons.size()); + assertTrue(failReasons.entrySet().stream().map(it -> it.getValue().getSyncFailReason()).collect( + Collectors.toList()).contains(failReason)); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/BaseTaskTest.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/BaseTaskTest.java new file mode 100644 index 000000000000..617b200c8088 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/BaseTaskTest.java @@ -0,0 +1,128 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import static alluxio.file.options.DescendantType.ALL; +import static alluxio.file.options.DescendantType.NONE; +import static alluxio.file.options.DescendantType.ONE; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.file.options.DirectoryLoadType; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; + +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.time.Clock; +import java.util.function.Function; + +public class BaseTaskTest { + + private MetadataSyncHandler mMetadataSyncHandler; + + private final Clock mClock = Clock.systemUTC(); + + private final MockUfsClient mUfsClient = new MockUfsClient(); + + private final Function> mClientSupplier = + (uri) -> new CloseableResource(mUfsClient) { + @Override + public void closeResource() {} + }; + + @Before + public void before() { + mMetadataSyncHandler = new MetadataSyncHandler(Mockito.mock(TaskTracker.class), null, null); + } + + @Test + public void PathIsCoveredNone() { + BaseTask path = BaseTask.create(new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + NONE, 0, DirectoryLoadType.SINGLE_LISTING, 0), mClock.millis(), mClientSupplier); + assertTrue(path.pathIsCovered(new AlluxioURI("/path"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested"), NONE)); + + assertFalse(path.pathIsCovered(new AlluxioURI("/path"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested"), ONE)); + + assertFalse(path.pathIsCovered(new AlluxioURI("/path"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested"), ALL)); + } + + @Test + public void PathIsCoveredOne() { + BaseTask path = BaseTask.create(new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ONE, 0, DirectoryLoadType.SINGLE_LISTING, 0), mClock.millis(), mClientSupplier); + assertTrue(path.pathIsCovered(new AlluxioURI("/path"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), NONE)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested/nested"), NONE)); + + assertTrue(path.pathIsCovered(new AlluxioURI("/path"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested/nested"), ONE)); + + assertFalse(path.pathIsCovered(new AlluxioURI("/path"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested/nested"), ALL)); + } + + @Test + public void PathIsCoveredAll() { + BaseTask path = BaseTask.create(new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ALL, 0, DirectoryLoadType.SINGLE_LISTING, 0), mClock.millis(), mClientSupplier); + assertTrue(path.pathIsCovered(new AlluxioURI("/path"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), NONE)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested"), NONE)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested/nested"), NONE)); + + assertTrue(path.pathIsCovered(new AlluxioURI("/path"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), ONE)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested"), ONE)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested/nested"), ONE)); + + assertTrue(path.pathIsCovered(new AlluxioURI("/path"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), ALL)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested"), ALL)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested/nested"), ALL)); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/BatchPathWaiterTest.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/BatchPathWaiterTest.java new file mode 100644 index 000000000000..8e4cd834be5e --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/BatchPathWaiterTest.java @@ -0,0 +1,334 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import static alluxio.file.options.DescendantType.ALL; +import static alluxio.file.options.DescendantType.NONE; +import static alluxio.file.options.DescendantType.ONE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyLong; + +import alluxio.AlluxioURI; +import alluxio.exception.status.UnavailableException; +import alluxio.file.options.DirectoryLoadType; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.journal.NoopJournalContext; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; + +import com.google.common.collect.Lists; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.time.Clock; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.function.Function; + +public class BatchPathWaiterTest { + + ExecutorService mThreadPool; + + private final Clock mClock = Clock.systemUTC(); + private MetadataSyncHandler mMetadataSyncHandler; + + private final MockUfsClient mUfsClient = new MockUfsClient(); + + private final Function> mClientSupplier = + (uri) -> new CloseableResource(mUfsClient) { + @Override + public void closeResource() {} + }; + + @Before + public void before() throws UnavailableException { + mThreadPool = Executors.newCachedThreadPool(); + DefaultFileSystemMaster defaultFileSystemMaster = Mockito.mock(DefaultFileSystemMaster.class); + Mockito.when(defaultFileSystemMaster.createJournalContext()) + .thenReturn(NoopJournalContext.INSTANCE); + mMetadataSyncHandler = Mockito.spy(new MetadataSyncHandler(Mockito.mock(TaskTracker.class), + defaultFileSystemMaster, null)); + } + + @After + public void after() { + mThreadPool.shutdown(); + } + + @Test + public void TestWaiter() throws Exception { + long nxtLoadID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + NONE, 0, DirectoryLoadType.SINGLE_LISTING, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path"))); + assertThrows(TimeoutException.class, () -> waiter.get(1, TimeUnit.SECONDS)); + // Complete the sync + path.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), null, + false, false)); + SyncProcessResult result = new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path"), + new AlluxioURI("/path")), false, true); + path.nextCompleted(result); + // Even though we completed the path being waited for, we only release the waiter for + // paths greater than the completed path + assertThrows(TimeoutException.class, () -> waiter.get(1, TimeUnit.SECONDS)); + // now on completion of the task the waiter can be released + path.getPathLoadTask().onProcessComplete(nxtLoadID, result); + assertTrue(path.isCompleted().isPresent()); + assertTrue(waiter.get(1, TimeUnit.SECONDS)); + } + + @Test + public void TestMultiWaiter() throws Exception { + long nxtLoadID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ONE, 0, DirectoryLoadType.SINGLE_LISTING, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter1 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/1"))); + Future waiter2 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/2"))); + // after completing /path/1 no waiters will be released + path.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path"), + new AlluxioURI("/path/1")), true, false)); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + // after completing /path/2, the waiter for /path/1 will be released + SyncProcessResult result = new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/2")), false, false); + path.nextCompleted(result); + assertTrue(waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + // now on completion of the task all waiters can be released + path.getPathLoadTask().onProcessComplete(nxtLoadID, result); + assertTrue(path.isCompleted().isPresent()); + assertTrue(waiter2.get(1, TimeUnit.SECONDS)); + } + + @Test + public void TestWaiterOutOfOrder() throws Exception { + long nxtLoadID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ONE, 0, DirectoryLoadType.SINGLE_LISTING, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter1 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/1"))); + Future waiter2 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/2"))); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + path.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path/3"), + new AlluxioURI("/path/4")), true, false)); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + path.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path/2"), + new AlluxioURI("/path/3")), true, false)); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + path.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path"), + new AlluxioURI("/path/1")), true, false)); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + SyncProcessResult result = new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/2")), false, false); + path.nextCompleted(result); + assertTrue(waiter2.get(1, TimeUnit.SECONDS)); + path.getPathLoadTask().onProcessComplete(nxtLoadID, result); + assertTrue(path.isCompleted().isPresent()); + } + + @Test + public void TestBaseTackSinglePath() { + long nxtLoadID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + NONE, 0, DirectoryLoadType.SINGLE_LISTING, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + assertFalse(path.isCompleted().isPresent()); + SyncProcessResult result = new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path"), + new AlluxioURI("/path")), false, false); + path.nextCompleted(result); + path.getPathLoadTask().onProcessComplete(nxtLoadID, result); + assertTrue(path.isCompleted().isPresent()); + } + + @Test + public void TestBaseTaskInOrder() { + long nxtLoadID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + ALL, 0, DirectoryLoadType.SINGLE_LISTING, 0); + BatchPathWaiter root = (BatchPathWaiter) BaseTask.create( + ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + root.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + assertFalse(root.isCompleted().isPresent()); + + // complete , should have |<,/ad>| + PathSequence completed = new PathSequence(new AlluxioURI("/"), + new AlluxioURI("/ad")); + List completedList = Lists.newArrayList( + new PathSequence(new AlluxioURI(""), new AlluxioURI("/ad"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, + false)); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/bf>| + completed = new PathSequence(new AlluxioURI("/ad"), new AlluxioURI("/bf")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/bf"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, + false)); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/bf/eg| + completed = new PathSequence(new AlluxioURI("/bf"), new AlluxioURI("/bf/eg")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), + new AlluxioURI("/bf/eg"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, + false)); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/tr| + completed = new PathSequence(new AlluxioURI("/bf/eg"), new AlluxioURI("/tr")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/tr"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, + false)); + assertEquals(completedList, root.getLastCompleted()); + + // finish with + completed = new PathSequence(new AlluxioURI("/tr"), new AlluxioURI("/trd")); + SyncProcessResult finalResult = new SyncProcessResult(ti, ti.getBasePath(), completed, + false, false); + root.nextCompleted(finalResult); + root.getPathLoadTask().onProcessComplete(nxtLoadID, finalResult); + assertTrue(root.isCompleted().isPresent()); + } + + @Test + public void TestBaseTaskOutOfOrder() { + long nxtLoadID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + ONE, 0, DirectoryLoadType.SINGLE_LISTING, 0); + BatchPathWaiter root = (BatchPathWaiter) BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + root.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + assertFalse(root.isCompleted().isPresent()); + + // complete , should have |<,a>| + PathSequence completed = new PathSequence(new AlluxioURI("/"), new AlluxioURI("/a")); + List completedList = Lists.newArrayList( + new PathSequence(new AlluxioURI(""), new AlluxioURI("/a"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,b>| + completed = new PathSequence(new AlluxioURI("/a"), new AlluxioURI("/b")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/b"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<, /b>, | + completed = new PathSequence(new AlluxioURI("/c"), new AlluxioURI("/d")); + completedList.add(completed); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/d>| + completed = new PathSequence(new AlluxioURI("/b"), new AlluxioURI("/c")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/d"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/d>, | + completed = new PathSequence(new AlluxioURI("/g"), new AlluxioURI("/h")); + completedList.add(completed); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/e>, | + completed = new PathSequence(new AlluxioURI("/d"), new AlluxioURI("/e")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/e")), + new PathSequence(new AlluxioURI("/g"), new AlluxioURI("/h"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/e>, | + completed = new PathSequence(new AlluxioURI("/f"), new AlluxioURI("/g")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/e")), + new PathSequence(new AlluxioURI("/f"), new AlluxioURI("/h"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/h>| + completed = new PathSequence(new AlluxioURI("/e"), new AlluxioURI("/f")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/h"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // finish with + completed = new PathSequence(new AlluxioURI("/h"), new AlluxioURI("/j")); + SyncProcessResult finalResult = new SyncProcessResult(ti, ti.getBasePath(), completed, + false, false); + root.nextCompleted(finalResult); + root.getPathLoadTask().onProcessComplete(nxtLoadID, finalResult); + assertTrue(root.isCompleted().isPresent()); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/DirectoryPathWaiterTest.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/DirectoryPathWaiterTest.java new file mode 100644 index 000000000000..30b6892e8402 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/DirectoryPathWaiterTest.java @@ -0,0 +1,196 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import static alluxio.file.options.DescendantType.ALL; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyLong; + +import alluxio.AlluxioURI; +import alluxio.exception.status.UnavailableException; +import alluxio.file.options.DirectoryLoadType; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.journal.NoopJournalContext; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.mockito.Mockito; + +import java.time.Clock; +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.function.Function; + +@RunWith(Parameterized.class) +public class DirectoryPathWaiterTest { + + @Parameterized.Parameters + public static Collection directoryLoadTypes() { + return Arrays.asList(DirectoryLoadType.DFS, DirectoryLoadType.BFS); + } + + public DirectoryPathWaiterTest(DirectoryLoadType loadType) { + mDirLoadType = loadType; + } + + private final MockUfsClient mUfsClient = new MockUfsClient(); + + private final Function> mClientSupplier = + (uri) -> new CloseableResource(mUfsClient) { + @Override + public void closeResource() {} + }; + + DirectoryLoadType mDirLoadType; + ExecutorService mThreadPool; + Clock mClock = Clock.systemUTC(); + MetadataSyncHandler mMetadataSyncHandler; + + @Before + public void before() throws UnavailableException { + mThreadPool = Executors.newCachedThreadPool(); + DefaultFileSystemMaster defaultFileSystemMaster = Mockito.mock(DefaultFileSystemMaster.class); + Mockito.when(defaultFileSystemMaster.createJournalContext()) + .thenReturn(NoopJournalContext.INSTANCE); + mMetadataSyncHandler = Mockito.spy(new MetadataSyncHandler(Mockito.mock(TaskTracker.class), + defaultFileSystemMaster, null)); + } + + @After + public void after() { + mThreadPool.shutdown(); + } + + @Test + public void TestWaiter() throws Exception { + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ALL, 0, mDirLoadType, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path"))); + assertThrows(TimeoutException.class, () -> waiter.get(1, TimeUnit.SECONDS)); + path.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path"), + new AlluxioURI("/path")), false, true)); + assertTrue(waiter.get(1, TimeUnit.SECONDS)); + } + + @Test + public void TestMultiWaiter() throws Exception { + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ALL, 0, mDirLoadType, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter1 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/1"))); + Future waiter2 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/2"))); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + path.nextCompleted(new SyncProcessResult(ti, new AlluxioURI("/path/1"), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/1")), false, false)); + assertTrue(waiter1.get(1, TimeUnit.SECONDS)); + // if the path is truncated, it should not release the waiter on the path + path.nextCompleted(new SyncProcessResult(ti, new AlluxioURI("/path/2"), + new PathSequence(new AlluxioURI("/path/2"), + new AlluxioURI("/path/2")), true, false)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + path.nextCompleted(new SyncProcessResult(ti, new AlluxioURI("/path/2"), + new PathSequence(new AlluxioURI("/path/2"), + new AlluxioURI("/path/2")), false, false)); + assertTrue(waiter2.get(1, TimeUnit.SECONDS)); + } + + @Test + public void TestNestedWaiter() throws Exception { + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ALL, 0, mDirLoadType, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter1 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/1"))); + Future waiter2 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/2"))); + // a different nested path should not release the waiters + path.nextCompleted(new SyncProcessResult(ti, new AlluxioURI("/path/other"), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/1")), false, false)); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + // the parent path should release both the children + path.nextCompleted(new SyncProcessResult(ti, new AlluxioURI("/path"), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/1")), false, false)); + assertTrue(waiter1.get(1, TimeUnit.SECONDS)); + assertTrue(waiter2.get(1, TimeUnit.SECONDS)); + } + + @Test + public void TestParentWaiter() throws Exception { + long loadRequestID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/path"), null, + ALL, 0, mDirLoadType, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter1 = mThreadPool.submit(() -> + path.waitForSync(new AlluxioURI("/path/nested/1"))); + Future waiter2 = mThreadPool.submit(() -> + path.waitForSync(new AlluxioURI("/path/nested"))); + Future waiter3 = mThreadPool.submit(() -> + path.waitForSync(new AlluxioURI("/path"))); + // finishing the root should only release the direct children + path.nextCompleted(new SyncProcessResult(ti, new AlluxioURI("/"), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/1")), false, false)); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + assertTrue(waiter3.get(1, TimeUnit.SECONDS)); + // finishing /path should release the direct children of /path + SyncProcessResult finalResult = new SyncProcessResult(ti, new AlluxioURI("/path"), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/1")), false, false); + path.nextCompleted(finalResult); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertTrue(waiter2.get(1, TimeUnit.SECONDS)); + // finishing the whole task should release the remaining waiters + path.getPathLoadTask().onProcessComplete(loadRequestID, finalResult); + assertTrue(waiter1.get(1, TimeUnit.SECONDS)); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/DummySyncProcess.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/DummySyncProcess.java new file mode 100644 index 000000000000..a268dce6f015 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/DummySyncProcess.java @@ -0,0 +1,63 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.InvalidArgumentRuntimeException; +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.underfs.UfsStatus; + +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class DummySyncProcess implements SyncProcess { + + @Override + public SyncProcessResult performSync( + LoadResult loadResult, UfsSyncPathCache syncPathCache) throws Throwable { + + Stream stream = loadResult.getUfsLoadResult().getItems().peek(status -> { + // If we are loading by directory, then we must create a new load task on each + // directory traversed + if (loadResult.getTaskInfo().hasDirLoadTasks() && status.isDirectory()) { + try { + AlluxioURI fullPath = loadResult.getBaseLoadPath().join(status.getName()); + // first check if the directory needs to be synced + if (syncPathCache.shouldSyncPath( + fullPath, // no reverse resolve in test + loadResult.getTaskInfo().getSyncInterval(), + loadResult.getTaskInfo().getDescendantType()).isShouldSync()) { + loadResult.getTaskInfo().getMdSync() + .loadNestedDirectory(loadResult.getTaskInfo().getId(), fullPath); + } + } catch (InvalidPathException e) { + throw new InvalidArgumentRuntimeException(e); + } + } + }); + List items = stream.collect(Collectors.toList()); + if (items.size() == 0) { + return new SyncProcessResult(loadResult.getTaskInfo(), loadResult.getBaseLoadPath(), + null, false, false); + } + boolean rootPathIsFile = items.size() == 1 && loadResult.getBaseLoadPath().equals( + loadResult.getTaskInfo().getBasePath()) && !items.get(0).isDirectory(); + return new SyncProcessResult(loadResult.getTaskInfo(), loadResult.getBaseLoadPath(), + new PathSequence(new AlluxioURI(items.get(0).getName()), + new AlluxioURI(items.get(items.size() - 1).getName())), + loadResult.getUfsLoadResult().isTruncated(), rootPathIsFile + ); + } +} + diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/MockUfsClient.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/MockUfsClient.java new file mode 100644 index 000000000000..d10612b9b879 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/MockUfsClient.java @@ -0,0 +1,108 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.collections.Pair; +import alluxio.file.options.DescendantType; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsLoadResult; +import alluxio.underfs.UfsStatus; +import alluxio.util.RateLimiter; + +import java.util.Iterator; +import java.util.List; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nullable; + +public class MockUfsClient implements UfsClient { + + Throwable mError = null; + Iterator> mItems = null; + Function, Boolean>> mResultFunc = null; + UfsStatus mUfsStatus = null; + RateLimiter mRateLimiter = null; + Function mGetStatusFunc = null; + + void setError(@Nullable Throwable t) { + mError = t; + } + + void setRateLimiter(RateLimiter rateLimiter) { + mRateLimiter = rateLimiter; + } + + void setResult(Iterator> items) { + mItems = items; + } + + void setGetStatusItem(UfsStatus item) { + mUfsStatus = item; + } + + void setListingResultFunc(Function, Boolean>> resultFunc) { + mResultFunc = resultFunc; + } + + public void performGetStatusAsync( + String path, Consumer onComplete, Consumer onError) { + UfsStatus status = mUfsStatus; + if (mGetStatusFunc != null) { + status = mGetStatusFunc.apply(path); + } + onComplete.accept(new UfsLoadResult( + status == null ? Stream.empty() : Stream.of(status), + status == null ? 0 : 1, + null, null, false, + status != null && status.isFile(), true)); + } + + @Override + public void performListingAsync( + String path, @Nullable String continuationToken, @Nullable String startAfter, + DescendantType descendantType, boolean checkStatus, + Consumer onComplete, Consumer onError) { + if (mError != null) { + onError.accept(mError); + } else if (mResultFunc != null) { + try { + Pair, Boolean> result = mResultFunc.apply(path); + List items = result.getFirst().collect(Collectors.toList()); + AlluxioURI lastItem = new AlluxioURI(items.get(items.size() - 1).getName()); + onComplete.accept(new UfsLoadResult(items.stream(), items.size(), + continuationToken, lastItem, result.getSecond(), + items.size() > 0 && items.get(0).isFile(), true)); + } catch (Throwable t) { + onError.accept(t); + } + } else { + if (mItems.hasNext()) { + List items = mItems.next().collect(Collectors.toList()); + AlluxioURI lastItem = new AlluxioURI(items.get(items.size() - 1).getName()); + onComplete.accept(new UfsLoadResult(items.stream(), items.size(), + continuationToken, lastItem, mItems.hasNext(), + items.size() > 0 && items.get(0).isFile(), true)); + } + } + } + + @Override + public RateLimiter getRateLimiter() { + if (mRateLimiter == null) { + return RateLimiter.createRateLimiter(0); + } + return mRateLimiter; + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/TaskTrackerTest.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/TaskTrackerTest.java new file mode 100644 index 000000000000..644e7fcdf92c --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/TaskTrackerTest.java @@ -0,0 +1,673 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; + +import alluxio.AlluxioURI; +import alluxio.collections.Pair; +import alluxio.exception.status.UnavailableException; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.file.meta.SyncCheck; +import alluxio.master.file.meta.UfsAbsentPathCache; +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.master.journal.NoopJournalContext; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsDirectoryStatus; +import alluxio.underfs.UfsFileStatus; +import alluxio.underfs.UfsStatus; +import alluxio.util.CommonUtils; +import alluxio.util.SimpleRateLimiter; +import alluxio.util.WaitForOptions; + +import com.google.common.base.Ticker; +import com.google.common.collect.ImmutableList; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.IOException; +import java.time.Duration; +import java.util.Collections; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Stream; + +public class TaskTrackerTest { + + ExecutorService mThreadPool; + TaskTracker mTaskTracker; + MetadataSyncHandler mMetadataSyncHandler; + MockUfsClient mUfsClient; + UfsSyncPathCache mUfsSyncPathCache; + UfsAbsentPathCache mAbsentCache; + SyncProcess mSyncProcess; + UfsStatus mFileStatus = new UfsFileStatus("file", "", + 0L, 0L, "", "", (short) 0, 0L); + UfsStatus mDirStatus = new UfsDirectoryStatus("dir", "", "", (short) 0); + static final long WAIT_TIMEOUT = 5_000; + + private CloseableResource getClient(AlluxioURI ignored) { + return new CloseableResource(mUfsClient) { + @Override + public void closeResource() { + } + }; + } + + @Before + public void before() throws UnavailableException { + mThreadPool = Executors.newCachedThreadPool(); + mUfsClient = Mockito.spy(new MockUfsClient()); + mSyncProcess = Mockito.spy(new DummySyncProcess()); + mUfsSyncPathCache = Mockito.mock(UfsSyncPathCache.class); + mAbsentCache = Mockito.mock(UfsAbsentPathCache.class); + mTaskTracker = new TaskTracker( + 1, 1, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + DefaultFileSystemMaster defaultFileSystemMaster = Mockito.mock(DefaultFileSystemMaster.class); + Mockito.when(defaultFileSystemMaster.createJournalContext()) + .thenReturn(NoopJournalContext.INSTANCE); + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, defaultFileSystemMaster, null); + } + + @After + public void after() throws Throwable { + assertFalse(mTaskTracker.hasRunningTasks()); + mTaskTracker.close(); + mThreadPool.shutdown(); + } + + void checkStats( + TaskStats stats, int batches, int statuses, int loadErrors, + int loadRequests, boolean loadFailed, boolean processFailed, + boolean firstLoadWasFile) { + if (batches >= 0) { + assertEquals(batches, stats.getBatchCount()); + } + if (statuses >= 0) { + assertEquals(statuses, stats.getStatusCount()); + } + if (loadErrors >= 0) { + assertEquals(loadErrors, stats.getLoadErrors()); + } + if (loadRequests >= 0) { + assertEquals(loadRequests, stats.getLoadRequestCount()); + } + assertEquals(loadFailed, stats.isLoadFailed()); + assertEquals(processFailed, stats.isProcessFailed()); + assertEquals(firstLoadWasFile, stats.firstLoadWasFile()); + } + + @Test + public void rateLimitedTest() throws Throwable { + // Be sure ufs loads, and result processing can happen concurrently + int concurrentUfsLoads = 2; + int totalBatches = 10; + int concurrentProcessing = 5; + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + final AtomicLong time = new AtomicLong(0); + long permitsPerSecond = 100000; + long timePerPermit = Duration.ofSeconds(1).toNanos() / permitsPerSecond; + // add a rate limiter + Semaphore rateLimiterBlocker = new Semaphore(0); + SimpleRateLimiter rateLimiter = Mockito.spy( + new SimpleRateLimiter(permitsPerSecond, new Ticker() { + @Override + public long read() { + return time.get(); + } + })); + Mockito.doAnswer(ans -> { + Object result = ans.callRealMethod(); + // after acquiring a permit, we let the main thread know + // by increasing the semaphore + rateLimiterBlocker.release(); + return result; + }).when(rateLimiter).acquire(); + mUfsClient.setRateLimiter(rateLimiter); + mTaskTracker.close(); + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler( + mTaskTracker, mMetadataSyncHandler.mFsMaster, null); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + boolean truncated = nxtItem > 0; + return new Pair<>(Stream.of(mFileStatus), truncated); + }); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + + for (int i = 0; i < 10; i++) { + remainingLoadCount.set(totalBatches); + + // move the time forward, and take a rate limit permit + // so that any new task will be blocked + time.addAndGet(timePerPermit); + rateLimiter.acquire(); + rateLimiterBlocker.acquire(); + + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.SINGLE_LISTING)); + + for (int j = 0; j < totalBatches; j++) { + int finalJ = j; + CommonUtils.waitForResult("Rate limited listStatus", remainingLoadCount::get, + v -> v == totalBatches - finalJ, + // wait for the next listStatus call to get its rate limiter permit + WaitForOptions.defaults().setTimeoutMs(1000)); + rateLimiterBlocker.acquire(); + // allow the rate limited operation to succeed by moving the time forward + time.addAndGet(timePerPermit); + } + Pair result = task.get(); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + assertEquals(remainingLoadCount.get(), 0); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, totalBatches, totalBatches, 0, totalBatches, + false, false, true); + } + } + + @Test + public void concurrentProcessTest() throws Throwable { + // Be sure ufs loads, and result processing can happen concurrently + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + int concurrentProcessing = 5; + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler( + mTaskTracker, mMetadataSyncHandler.mFsMaster, null); + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + AtomicInteger processingCount = new AtomicInteger(0); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + boolean truncated = nxtItem != 0; + return new Pair<>(Stream.of(mFileStatus), truncated); + }); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + + for (int i = 0; i < 100; i++) { + remainingLoadCount.set(totalBatches); + processingCount.set(0); + CountDownLatch blocker = new CountDownLatch(1); + Mockito.doAnswer(ans -> { + processingCount.incrementAndGet(); + // block the processing to ensure we have concurrent load requests + blocker.await(); + return ans.callRealMethod(); + }).when(mSyncProcess).performSync(any(), any()); + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.SINGLE_LISTING)); + CommonUtils.waitForResult("Concurrent load", remainingLoadCount::get, + v -> v == totalBatches - concurrentUfsLoads - concurrentProcessing, + WaitForOptions.defaults().setTimeoutMs(1000)); + CommonUtils.waitForResult("Concurrent processing", processingCount::get, + v -> v == concurrentProcessing, + WaitForOptions.defaults().setTimeoutMs(1000)); + // let the processing complete + blocker.countDown(); + Pair result = task.get(); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + assertEquals(remainingLoadCount.get(), 0); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 100, 100, 0, 100, false, false, true); + } + } + + @Test + public void concurrentDirProcessErrorTest() throws Throwable { + // Fail processing during concurrent ufs loading and processing when using load by directory + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + int processError = 95; + int concurrentProcessing = 5; + AtomicInteger remainingProcessCount = new AtomicInteger(processError); + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + Mockito.doAnswer(ans -> { + if (remainingProcessCount.decrementAndGet() == 0) { + throw new IOException(); + } + return ans.callRealMethod(); + }).when(mSyncProcess).performSync(any(), any()); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, null, null); + for (int i = 0; i < 100; i++) { + for (DirectoryLoadType loadType + : ImmutableList.of(DirectoryLoadType.DFS, DirectoryLoadType.BFS)) { + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + remainingProcessCount.set(processError); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + boolean truncated = nxtItem > 0; + return new Pair<>(Stream.of(mFileStatus, mDirStatus), truncated); + }); + + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, loadType)); + Pair result = task.get(); + assertThrows(IOException.class, () -> result.getSecond().waitComplete(WAIT_TIMEOUT)); + assertFalse(result.getSecond().succeeded()); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, -1, -1, -1, -1, false, true, true); + } + } + } + + @Test + public void concurrentDirLoadErrorTest() throws Throwable { + // Fail processing during concurrent ufs loading and processing + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + int concurrentProcessing = 5; + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + boolean truncated = nxtItem > 0; + if (truncated) { + return new Pair<>(Stream.of(mFileStatus, mDirStatus), true); + } else { + throw new RuntimeException(); + } + }); + + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, null, null); + for (int i = 0; i < 100; i++) { + for (DirectoryLoadType loadType + : ImmutableList.of(DirectoryLoadType.DFS, DirectoryLoadType.BFS)) { + remainingLoadCount.set(totalBatches); + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, loadType)); + Pair result = task.get(); + assertFalse(result.getFirst()); + assertThrows(RuntimeException.class, () -> result.getSecond().waitComplete(WAIT_TIMEOUT)); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, -1, -1, -1, -1, true, false, true); + } + } + } + + @Test + public void concurrentDirLoadTest() throws Throwable { + // Fail processing during concurrent ufs loading and processing + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + int concurrentProcessing = 5; + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler( + mTaskTracker, mMetadataSyncHandler.mFsMaster, null); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + boolean truncated = nxtItem > 0; + if (truncated) { + return new Pair<>(Stream.of(mFileStatus, mDirStatus), true); + } else { + return new Pair<>(Stream.of(mFileStatus), false); + } + }); + + for (int i = 0; i < 100; i++) { + for (DirectoryLoadType loadType + : ImmutableList.of(DirectoryLoadType.DFS, DirectoryLoadType.BFS)) { + remainingLoadCount.set(totalBatches); + + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, loadType)); + Pair result = task.get(); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, -1, -1, 0, -1, false, false, true); + } + } + } + + @Test + public void concurrentProcessErrorTest() throws Throwable { + // Fail processing during concurrent ufs loading and processing + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + int batchFailureNumber = 50; + int concurrentProcessing = 5; + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, null, null); + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + AtomicInteger processingCount = new AtomicInteger(0); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + boolean truncated = nxtItem != 0; + return new Pair<>(Stream.of(mFileStatus), truncated); + }); + Mockito.doAnswer(ans -> { + if (processingCount.incrementAndGet() == batchFailureNumber) { + throw new IOException(); + } + return ans.callRealMethod(); + }).when(mSyncProcess).performSync(any(), any()); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + + for (int i = 0; i < 100; i++) { + remainingLoadCount.set(totalBatches); + processingCount.set(0); + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.SINGLE_LISTING)); + Pair result = task.get(); + assertFalse(result.getFirst()); + assertThrows(IOException.class, () -> result.getSecond().waitComplete(WAIT_TIMEOUT)); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, -1, -1, 0, -1, false, true, true); + } + } + + @Test + public void concurrentLoadErrorTest() throws Throwable { + // Fail processing during concurrent ufs loading and processing + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + int loadFailNumber = 50; + int concurrentProcessing = 5; + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, null, null); + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + if (nxtItem <= loadFailNumber) { + throw new RuntimeException(); + } + return new Pair<>(Stream.of(mFileStatus), true); + }); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + + for (int i = 0; i < 100; i++) { + remainingLoadCount.set(totalBatches); + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.SINGLE_LISTING)); + Pair result = task.get(); + assertFalse(result.getFirst()); + assertThrows(RuntimeException.class, () -> result.getSecond().waitComplete(WAIT_TIMEOUT)); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, -1, -1, 4, -1, true, false, true); + } + } + + @Test + public void concurrentLoadTest() throws Throwable { + // be sure loads can happen concurrently + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + mTaskTracker = new TaskTracker( + 1, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, + mMetadataSyncHandler.mFsMaster, null); + AtomicInteger count = new AtomicInteger(totalBatches); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = count.decrementAndGet(); + boolean truncated = nxtItem != 0; + return new Pair<>(Stream.of(mFileStatus), truncated); + }); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + + for (int i = 0; i < 100; i++) { + count.set(totalBatches); + CountDownLatch blocker = new CountDownLatch(1); + Mockito.doAnswer(ans -> { + // block the processing to ensure we have concurrent load requests + blocker.await(); + return ans.callRealMethod(); + }).when(mSyncProcess).performSync(any(), any()); + + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.SINGLE_LISTING)); + CommonUtils.waitForResult("Concurrent load", count::get, + v -> v == totalBatches - concurrentUfsLoads - 1, + WaitForOptions.defaults().setTimeoutMs(1000)); + // let the processing complete + blocker.countDown(); + Pair result = task.get(); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + assertEquals(count.get(), 0); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 100, 100, 0, 100, false, false, true); + } + } + + @Test + public void dirLoadTest() throws Throwable { + // Load nested directories one level at a time in different batch requests + mUfsClient.setListingResultFunc(path -> { + if (path.equals("/")) { + return new Pair<>(Stream.of(mFileStatus, mDirStatus), false); + } else if (path.equals("/dir")) { + return new Pair<>(Stream.of(mFileStatus, mFileStatus), false); + } else { + throw new RuntimeException("should not reach"); + } + }); + + for (int i = 0; i < 100; i++) { + // Use load type BFS, there should be a load task for both / and /dir + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + Pair result = mTaskTracker.checkTask(mMetadataSyncHandler, + new AlluxioURI("/"), new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.BFS); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 2, 4, 0, 2, false, false, true); + + // run the same request, except have the sync for the nested directory not be needed + Mockito.doReturn(SyncCheck.shouldNotSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + result = mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.BFS); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 1, 2, 0, 1, false, false, true); + } + } + + @Test + public void basicSyncTest() throws Throwable { + for (int i = 0; i < 100; i++) { + mUfsClient.setResult(Collections.singletonList(Stream.of(mFileStatus)).iterator()); + Pair result = mTaskTracker.checkTask(mMetadataSyncHandler, + new AlluxioURI("/"), new AlluxioURI("/"), null, + DescendantType.ONE, 0, DirectoryLoadType.SINGLE_LISTING); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 1, 1, 0, 1, false, false, true); + } + } + + @Test + public void multiBatchTest() throws Throwable { + // load a directory of 2 batches of size 1 + for (int i = 0; i < 100; i++) { + mUfsClient.setResult(ImmutableList.of(Stream.of(mFileStatus), + Stream.of(mFileStatus)).iterator()); + Pair result = mTaskTracker.checkTask(mMetadataSyncHandler, + new AlluxioURI("/"), new AlluxioURI("/"), null, + DescendantType.ONE, 0, DirectoryLoadType.SINGLE_LISTING); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 2, 2, 0, 2, false, false, true); + } + } + + @Test + public void loadErrorTest() throws Throwable { + // Ufs loads return errors until failure + for (int i = 0; i < 100; i++) { + mUfsClient.setError(new Throwable()); + Pair result = mTaskTracker.checkTask(mMetadataSyncHandler, + new AlluxioURI("/"), new AlluxioURI("/"), null, + DescendantType.ONE, 0, DirectoryLoadType.SINGLE_LISTING); + assertFalse(result.getFirst()); + assertThrows(Throwable.class, () -> result.getSecond().waitComplete(WAIT_TIMEOUT)); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 0, 0, 4, 1, true, false, false); + } + } + + @Test + public void loadErrorRetryTest() throws Throwable { + int totalBatches = 100; + // Error on the first load, but let the next succeed + AtomicInteger count = new AtomicInteger(totalBatches); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = count.decrementAndGet(); + boolean truncated = nxtItem != 0; + if (truncated && nxtItem % 2 == 0) { + throw new RuntimeException(); + } + return new Pair<>(Stream.of(mFileStatus), truncated); + }); + for (int i = 0; i < 100; i++) { + count.set(totalBatches); + Pair result = mTaskTracker.checkTask(mMetadataSyncHandler, + new AlluxioURI("/"), new AlluxioURI("/"), null, DescendantType.ONE, 0, + DirectoryLoadType.SINGLE_LISTING); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + int amount = totalBatches / 2; + checkStats(stats, amount + 1, amount + 1, amount - 1, amount + 1, false, false, true); + } + } + + @Test + public void processErrorTest() throws Throwable { + // An error happens during processing + for (int i = 0; i < 100; i++) { + mUfsClient.setResult(ImmutableList.of(Stream.of(mFileStatus), + Stream.of(mFileStatus)).iterator()); + Mockito.doThrow(new IOException()).when(mSyncProcess).performSync(any(), any()); + Pair result = mTaskTracker.checkTask(mMetadataSyncHandler, + new AlluxioURI("/"), new AlluxioURI("/"), null, + DescendantType.ONE, 0, DirectoryLoadType.SINGLE_LISTING); + assertFalse(result.getFirst()); + assertThrows(IOException.class, () -> result.getSecond().waitComplete(WAIT_TIMEOUT)); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, -1, -1, 0, 2, false, true, true); + } + } + + @Test + public void blockingSyncTest() throws Throwable { + // run two concurrent processing syncing on the same path + // be sure one is blocked and they both succeed + for (int i = 0; i < 2; i++) { + mUfsClient.setResult(Collections.singletonList(Stream.of(mFileStatus)).iterator()); + Semaphore blocker = new Semaphore(0); + Mockito.doAnswer(ans -> { + // block the processing of any task + blocker.acquire(); + return ans.callRealMethod(); + }).when(mSyncProcess).performSync(any(), any()); + // Submit two concurrent tasks on the same path + Future> task1 = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ONE, 0, DirectoryLoadType.SINGLE_LISTING)); + assertThrows(TimeoutException.class, () -> task1.get(1, TimeUnit.SECONDS)); + Future> task2 = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ONE, 0, DirectoryLoadType.SINGLE_LISTING)); + assertThrows(TimeoutException.class, () -> task2.get(1, TimeUnit.SECONDS)); + // Let one task be processed + blocker.release(); + // Only one task should have been executed, but both should finish since they + // were on the same path + assertTrue(task1.get().getFirst()); + assertTrue(task2.get().getFirst()); + TaskStats stats1 = task1.get().getSecond().getTaskInfo().getStats(); + checkStats(stats1, 1, 1, 0, 1, false, false, true); + TaskStats stats2 = task2.get().getSecond().getTaskInfo().getStats(); + checkStats(stats2, 1, 1, 0, 1, false, false, true); + } + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/TestSyncProcessor.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/TestSyncProcessor.java new file mode 100644 index 000000000000..7c6a7cdcc808 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/TestSyncProcessor.java @@ -0,0 +1,101 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.exception.AccessControlException; +import alluxio.exception.BlockInfoException; +import alluxio.exception.DirectoryNotEmptyException; +import alluxio.exception.FileAlreadyExistsException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.file.meta.InodeIterationResult; +import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.MountTable; +import alluxio.master.file.meta.UfsAbsentPathCache; +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.master.metastore.ReadOnlyInodeStore; + +import java.io.IOException; +import java.util.concurrent.Semaphore; +import javax.annotation.Nullable; + +/** + * The metadata syncer. + */ +public class TestSyncProcessor extends DefaultSyncProcess { + @FunctionalInterface + public interface Callback { + void apply() throws Exception; + } + + @FunctionalInterface + public interface SyncOneCallback { + void apply(SyncProcessContext context) throws Exception; + } + + public TestSyncProcessor(DefaultFileSystemMaster fsMaster, ReadOnlyInodeStore inodeStore, + MountTable mountTable, InodeTree inodeTree, + UfsSyncPathCache syncPathCache, UfsAbsentPathCache absentPathCache) { + super(fsMaster, inodeStore, mountTable, inodeTree, syncPathCache, absentPathCache); + } + + Semaphore mLock = new Semaphore(0); + private int mBlockOnNth = -1; + private int mSyncCount = 0; + private Callback mCallback = null; + private SyncOneCallback mCallbackBeforePerformSyncOne = null; + + @Override + protected SingleInodeSyncResult performSyncOne(SyncProcessState syncState, + @Nullable UfsItem currentUfsStatus, + @Nullable InodeIterationResult currentInode) + throws InvalidPathException, FileDoesNotExistException, FileAlreadyExistsException, + IOException, BlockInfoException, DirectoryNotEmptyException, AccessControlException { + if (mCallbackBeforePerformSyncOne != null) { + try { + mCallbackBeforePerformSyncOne.apply(syncState.mContext); + } catch (Exception e) { + throw new RuntimeException(); + } + } + mSyncCount++; + if (mSyncCount == mBlockOnNth && mCallback != null) { + try { + mCallback.apply(); + } catch (Exception e) { + throw new RuntimeException(); + } + mLock.release(); + } + return super.performSyncOne(syncState, currentUfsStatus, currentInode); + } + + public synchronized void beforePerformSyncOne(SyncOneCallback callback) + throws InterruptedException { + mCallbackBeforePerformSyncOne = callback; + } + + /** + * Blocks the current thread until the nth inode sync (root included) is ABOUT TO execute, + * executes the callback and resumes the sync. + * Used for testing concurrent modifications. + * @param nth the inode sync count + * @param callback the callback to execute + */ + public synchronized void blockUntilNthSyncThenDo(int nth, Callback callback) + throws InterruptedException { + mBlockOnNth = nth; + mCallback = callback; + mLock.acquire(); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/UfsLoadsTest.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/UfsLoadsTest.java new file mode 100644 index 000000000000..932ba9dbf94d --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/UfsLoadsTest.java @@ -0,0 +1,88 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import static org.junit.Assert.assertFalse; +import static org.mockito.ArgumentMatchers.any; + +import alluxio.AlluxioURI; +import alluxio.master.file.meta.UfsAbsentPathCache; +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsDirectoryStatus; +import alluxio.underfs.UfsFileStatus; +import alluxio.underfs.UfsStatus; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +public class UfsLoadsTest { + ExecutorService mThreadPool; + TaskTracker mTaskTracker; + MetadataSyncHandler mMetadataSyncHandler; + MockUfsClient mUfsClient; + UfsSyncPathCache mUfsSyncPathCache; + UfsAbsentPathCache mAbsentPathCache; + SyncProcess mSyncProcess; + List mProcessedItems; + UfsStatus mFileStatus = new UfsFileStatus("file", "", + 0L, 0L, "", "", (short) 0, 0L); + UfsStatus mDirStatus = new UfsDirectoryStatus("dir", "", "", (short) 0); + static final long WAIT_TIMEOUT = 5_000; + + private CloseableResource getClient(AlluxioURI ignored) { + return new CloseableResource(mUfsClient) { + @Override + public void closeResource() { + } + }; + } + + @Before + public void before() throws Throwable { + mThreadPool = Executors.newCachedThreadPool(); + mUfsClient = Mockito.spy(new MockUfsClient()); + mSyncProcess = Mockito.spy(new DummySyncProcess()); + mProcessedItems = new ArrayList<>(); + Mockito.doAnswer(ans -> { + LoadResult result = ans.getArgument(0); + result.getUfsLoadResult().getItems().peek(mProcessedItems::add); + return ans.callRealMethod(); + }).when(mSyncProcess).performSync(any(LoadResult.class), any(UfsSyncPathCache.class)); + mAbsentPathCache = Mockito.mock(UfsAbsentPathCache.class); + mUfsSyncPathCache = Mockito.mock(UfsSyncPathCache.class); + mTaskTracker = new TaskTracker( + 1, 1, false, false, + mUfsSyncPathCache, mAbsentPathCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, null, null); + } + + @After + public void after() throws Throwable { + assertFalse(mTaskTracker.hasRunningTasks()); + mTaskTracker.close(); + mThreadPool.shutdown(); + } + + @Test + public void singleFileSync() { + mUfsClient.setGetStatusItem(mFileStatus); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/meta/CheckpointedIdHashSetTest.java b/core/server/master/src/test/java/alluxio/master/file/meta/CheckpointedIdHashSetTest.java new file mode 100644 index 000000000000..c7ed06f3ba25 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/meta/CheckpointedIdHashSetTest.java @@ -0,0 +1,63 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.meta; + +import alluxio.master.journal.checkpoint.CheckpointInputStream; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +@RunWith(Parameterized.class) +public class CheckpointedIdHashSetTest { + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new PinnedInodeFileIds(), new ReplicationLimitedFileIds(), + new ToBePersistedFileIds()); + } + + @Parameterized.Parameter + public CheckpointedIdHashSet mIdHashSet; + + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + @Test + public void test() throws IOException { + for (long i = 0L; i < 1_000_000L; i += 5762L) { + mIdHashSet.add(i); + } + List copyList = new ArrayList<>(mIdHashSet); + File file = mFolder.newFile(); + try (OutputStream outputStream = Files.newOutputStream(file.toPath())) { + mIdHashSet.writeToCheckpoint(outputStream); + } + mIdHashSet.clear(); + try (CheckpointInputStream inputStream = + new CheckpointInputStream(Files.newInputStream(file.toPath()))) { + mIdHashSet.restoreFromCheckpoint(inputStream); + } + Assert.assertTrue(mIdHashSet.containsAll(copyList)); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/meta/InodeTreeTest.java b/core/server/master/src/test/java/alluxio/master/file/meta/InodeTreeTest.java index 0c87807e2ff9..1ca5eb9ab5ab 100644 --- a/core/server/master/src/test/java/alluxio/master/file/meta/InodeTreeTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/meta/InodeTreeTest.java @@ -82,6 +82,8 @@ import java.util.Set; import java.util.Spliterator; import java.util.Spliterators; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -113,6 +115,8 @@ public final class InodeTreeTest { private MasterRegistry mRegistry; private MetricsMaster mMetricsMaster; + private ExecutorService mThreadPool; + @Parameters public static Iterable> parameters() throws Exception { String dir = @@ -163,12 +167,14 @@ public void before() throws Exception { mRegistry.start(true); mTree.initializeRoot(TEST_OWNER, TEST_GROUP, TEST_DIR_MODE, NoopJournalContext.INSTANCE); + mThreadPool = Executors.newCachedThreadPool(); } @After public void after() throws Exception { mRegistry.stop(); mInodeStore.close(); + mThreadPool.shutdown(); } /** diff --git a/core/server/master/src/test/java/alluxio/master/file/meta/LockedInodePathTest.java b/core/server/master/src/test/java/alluxio/master/file/meta/LockedInodePathTest.java index b071694b5824..d56512ee55d5 100644 --- a/core/server/master/src/test/java/alluxio/master/file/meta/LockedInodePathTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/meta/LockedInodePathTest.java @@ -24,6 +24,7 @@ import alluxio.exception.InvalidPathException; import alluxio.exception.status.UnavailableException; import alluxio.master.file.meta.InodeTree.LockPattern; +import alluxio.master.journal.FileSystemMergeJournalContext; import alluxio.master.journal.JournalContext; import alluxio.master.journal.NoopJournalContext; @@ -598,7 +599,7 @@ public void lockFinalEdgeWriteAlreadyLocked() throws Exception { @Test public void testFlushJournal() throws InvalidPathException, UnavailableException { AtomicInteger journalFlushCount = new AtomicInteger(); - JournalContext journalContext = mock(JournalContext.class); + JournalContext journalContext = mock(FileSystemMergeJournalContext.class); Mockito.doAnswer( (mock) -> { journalFlushCount.getAndIncrement(); diff --git a/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketListTest.java b/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketListTest.java index 2570719a13a3..2959df2fddb5 100644 --- a/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketListTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketListTest.java @@ -16,7 +16,6 @@ import alluxio.master.metastore.InodeStore; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import org.junit.Assert; import org.junit.Before; import org.junit.ClassRule; @@ -24,6 +23,7 @@ import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; /** * Unit tests for {@link TtlBucketList}. @@ -54,8 +54,8 @@ public void before() { mBucketList = new TtlBucketList(mock(InodeStore.class)); } - private List getSortedExpiredBuckets(long expireTime) { - List buckets = Lists.newArrayList(mBucketList.getExpiredBuckets(expireTime)); + private List pollSortedExpiredBuckets(long expireTime) { + List buckets = Lists.newArrayList(mBucketList.pollExpiredBuckets(expireTime)); Collections.sort(buckets); return buckets; } @@ -63,8 +63,10 @@ private List getSortedExpiredBuckets(long expireTime) { private void assertExpired(List expiredBuckets, int bucketIndex, Inode... inodes) { TtlBucket bucket = expiredBuckets.get(bucketIndex); - Assert.assertEquals(inodes.length, bucket.getInodes().size()); - Assert.assertTrue(bucket.getInodes().containsAll(Lists.newArrayList(inodes))); + Assert.assertEquals(inodes.length, bucket.size()); + List inodeIds = Lists.newArrayList(inodes).stream().map(Inode::getId) + .collect(Collectors.toList()); + Assert.assertTrue(bucket.getInodeIds().containsAll(inodeIds)); } /** @@ -73,24 +75,27 @@ private void assertExpired(List expiredBuckets, int bucketIndex, @Test public void insert() { // No bucket should expire. - List expired = getSortedExpiredBuckets(BUCKET1_START); + List expired = pollSortedExpiredBuckets(BUCKET1_START); Assert.assertTrue(expired.isEmpty()); mBucketList.insert(BUCKET1_FILE1); // The first bucket should expire. - expired = getSortedExpiredBuckets(BUCKET1_END); + expired = pollSortedExpiredBuckets(BUCKET1_END); assertExpired(expired, 0, BUCKET1_FILE1); + mBucketList.insert(BUCKET1_FILE1); mBucketList.insert(BUCKET1_FILE2); // Only the first bucket should expire. for (long end = BUCKET2_START; end < BUCKET2_END; end++) { - expired = getSortedExpiredBuckets(end); + expired = pollSortedExpiredBuckets(end); assertExpired(expired, 0, BUCKET1_FILE1, BUCKET1_FILE2); + mBucketList.insert(BUCKET1_FILE1); + mBucketList.insert(BUCKET1_FILE2); } mBucketList.insert(BUCKET2_FILE); // All buckets should expire. - expired = getSortedExpiredBuckets(BUCKET2_END); + expired = pollSortedExpiredBuckets(BUCKET2_END); assertExpired(expired, 0, BUCKET1_FILE1, BUCKET1_FILE2); assertExpired(expired, 1, BUCKET2_FILE); } @@ -104,39 +109,28 @@ public void remove() { mBucketList.insert(BUCKET1_FILE2); mBucketList.insert(BUCKET2_FILE); - List expired = getSortedExpiredBuckets(BUCKET1_END); + List expired = pollSortedExpiredBuckets(BUCKET1_END); assertExpired(expired, 0, BUCKET1_FILE1, BUCKET1_FILE2); + mBucketList.insert(BUCKET1_FILE1); + mBucketList.insert(BUCKET1_FILE2); mBucketList.remove(BUCKET1_FILE1); - expired = getSortedExpiredBuckets(BUCKET1_END); + expired = pollSortedExpiredBuckets(BUCKET1_END); // Only the first bucket should expire, and there should be only one BUCKET1_FILE2 in it. assertExpired(expired, 0, BUCKET1_FILE2); + mBucketList.insert(BUCKET1_FILE2); mBucketList.remove(BUCKET1_FILE2); - expired = getSortedExpiredBuckets(BUCKET1_END); + expired = pollSortedExpiredBuckets(BUCKET1_END); // Only the first bucket should expire, and there should be no files in it. assertExpired(expired, 0); // nothing in bucket 0. - expired = getSortedExpiredBuckets(BUCKET2_END); - // All buckets should expire. - assertExpired(expired, 0); // nothing in bucket 0. - assertExpired(expired, 1, BUCKET2_FILE); - - // Remove bucket 0. - expired = getSortedExpiredBuckets(BUCKET1_END); - mBucketList.removeBuckets(Sets.newHashSet(expired)); - - expired = getSortedExpiredBuckets(BUCKET2_END); - // The only remaining bucket is bucket 1, it should expire. + expired = pollSortedExpiredBuckets(BUCKET2_END); + // Current bucket should expire. assertExpired(expired, 0, BUCKET2_FILE); - mBucketList.remove(BUCKET2_FILE); - expired = getSortedExpiredBuckets(BUCKET2_END); - assertExpired(expired, 0); // nothing in bucket. - - mBucketList.removeBuckets(Sets.newHashSet(expired)); // No bucket should exist now. - expired = getSortedExpiredBuckets(BUCKET2_END); + expired = pollSortedExpiredBuckets(BUCKET2_END); Assert.assertEquals(0, expired.size()); } } diff --git a/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketTest.java b/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketTest.java index 29b6a5081fe3..0a035918d4ce 100644 --- a/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketTest.java @@ -74,25 +74,35 @@ public void compareIntervalStartTime() { public void addAndRemoveInodeFile() { Inode fileTtl1 = TtlTestUtils.createFileWithIdAndTtl(0, 1); Inode fileTtl2 = TtlTestUtils.createFileWithIdAndTtl(1, 2); - Assert.assertTrue(mBucket.getInodes().isEmpty()); + Assert.assertTrue(mBucket.getInodeIds().isEmpty()); mBucket.addInode(fileTtl1); - Assert.assertEquals(1, mBucket.getInodes().size()); + Assert.assertEquals(1, mBucket.size()); // The same file, won't be added. mBucket.addInode(fileTtl1); - Assert.assertEquals(1, mBucket.getInodes().size()); + Assert.assertEquals(1, mBucket.size()); // Different file, will be added. mBucket.addInode(fileTtl2); - Assert.assertEquals(2, mBucket.getInodes().size()); + Assert.assertEquals(2, mBucket.size()); // Remove files; mBucket.removeInode(fileTtl1); - Assert.assertEquals(1, mBucket.getInodes().size()); - Assert.assertTrue(mBucket.getInodes().contains(fileTtl2)); + Assert.assertEquals(1, mBucket.size()); + Assert.assertTrue(mBucket.getInodeIds().contains(fileTtl2.getId())); mBucket.removeInode(fileTtl2); - Assert.assertEquals(0, mBucket.getInodes().size()); + Assert.assertEquals(0, mBucket.size()); + + // Retry attempts; + mBucket.addInode(fileTtl1); + Assert.assertTrue(mBucket.getInodeIds().contains(fileTtl1.getId())); + int retryAttempt = mBucket.getInodeExpiries().iterator().next().getValue(); + Assert.assertEquals(retryAttempt, TtlBucket.DEFAULT_RETRY_ATTEMPTS); + mBucket.addInode(fileTtl1, 2); + Assert.assertTrue(mBucket.getInodeIds().contains(fileTtl1.getId())); + int newRetryAttempt = mBucket.getInodeExpiries().iterator().next().getValue(); + Assert.assertEquals(newRetryAttempt, 2); } /** @@ -103,25 +113,25 @@ public void addAndRemoveInodeFile() { public void addAndRemoveInodeDirectory() { Inode directoryTtl1 = TtlTestUtils.createDirectoryWithIdAndTtl(0, 1); Inode directoryTtl2 = TtlTestUtils.createDirectoryWithIdAndTtl(1, 2); - Assert.assertTrue(mBucket.getInodes().isEmpty()); + Assert.assertTrue(mBucket.getInodeIds().isEmpty()); mBucket.addInode(directoryTtl1); - Assert.assertEquals(1, mBucket.getInodes().size()); + Assert.assertEquals(1, mBucket.size()); // The same directory, won't be added. mBucket.addInode(directoryTtl1); - Assert.assertEquals(1, mBucket.getInodes().size()); + Assert.assertEquals(1, mBucket.size()); // Different directory, will be added. mBucket.addInode(directoryTtl2); - Assert.assertEquals(2, mBucket.getInodes().size()); + Assert.assertEquals(2, mBucket.size()); // Remove directorys; mBucket.removeInode(directoryTtl1); - Assert.assertEquals(1, mBucket.getInodes().size()); - Assert.assertTrue(mBucket.getInodes().contains(directoryTtl2)); + Assert.assertEquals(1, mBucket.size()); + Assert.assertTrue(mBucket.getInodeIds().contains(directoryTtl2.getId())); mBucket.removeInode(directoryTtl2); - Assert.assertEquals(0, mBucket.getInodes().size()); + Assert.assertEquals(0, mBucket.getInodeIds().size()); } /** diff --git a/core/server/master/src/test/java/alluxio/master/file/replication/ReplicationCheckerTest.java b/core/server/master/src/test/java/alluxio/master/file/replication/ReplicationCheckerTest.java index 45c4db8333d1..f49c504db33c 100644 --- a/core/server/master/src/test/java/alluxio/master/file/replication/ReplicationCheckerTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/replication/ReplicationCheckerTest.java @@ -23,6 +23,7 @@ import alluxio.grpc.StorageList; import alluxio.job.plan.replicate.ReplicationHandler; import alluxio.job.wire.Status; +import alluxio.master.AlwaysPrimaryPrimarySelector; import alluxio.master.CoreMasterContext; import alluxio.master.MasterRegistry; import alluxio.master.MasterTestUtils; @@ -165,7 +166,8 @@ public void before() throws Exception { Configuration.set(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.UFS); MasterRegistry registry = new MasterRegistry(); JournalSystem journalSystem = JournalTestUtils.createJournalSystem(mTestFolder); - mContext = MasterTestUtils.testMasterContext(journalSystem); + mContext = MasterTestUtils.testMasterContext(journalSystem, + null, new AlwaysPrimaryPrimarySelector()); new MetricsMasterFactory().create(registry, mContext); mBlockMaster = new BlockMasterFactory().create(registry, mContext); InodeDirectoryIdGenerator directoryIdGenerator = new InodeDirectoryIdGenerator(mBlockMaster); @@ -279,7 +281,7 @@ private void heartbeatToAddLocationHelper(long blockId, long workerId) throws Ex @Test public void heartbeatWhenTreeIsEmpty() throws Exception { - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); } @@ -290,17 +292,17 @@ public void heartbeatFileWithinRange() throws Exception { createBlockHelper(TEST_FILE_1, mFileContext, ""); // One replica, meeting replication min addBlockLocationHelper(blockId, 1); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); // Two replicas, good heartbeatToAddLocationHelper(blockId, createWorkerHelper(1)); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); // Three replicas, meeting replication max, still good heartbeatToAddLocationHelper(blockId, createWorkerHelper(2)); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); } @@ -309,7 +311,7 @@ public void heartbeatFileUnderReplicatedBy1() throws Exception { mFileContext.getOptions().setReplicationMin(1); long blockId = createBlockHelper(TEST_FILE_1, mFileContext, ""); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected = ImmutableMap.of(blockId, 1); Assert.assertEquals(expected, mMockReplicationHandler.getSetReplicaRequests()); } @@ -320,7 +322,7 @@ public void heartbeatFileNeedsMove() throws Exception { long blockId = createBlockHelper(TEST_FILE_1, mFileContext, Constants.MEDIUM_SSD); addBlockLocationHelper(blockId, 1); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map> expected = ImmutableMap.of(blockId, new Pair<>("host0", Constants.MEDIUM_SSD)); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); @@ -333,7 +335,7 @@ public void heartbeatFileDoesnotNeedMove() throws Exception { long blockId = createBlockHelper(TEST_FILE_1, mFileContext, Constants.MEDIUM_MEM); addBlockLocationHelper(blockId, 1); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); Assert.assertEquals(EMPTY, mMockReplicationHandler.getMigrateRequests()); } @@ -343,7 +345,7 @@ public void heartbeatFileUnderReplicatedBy10() throws Exception { mFileContext.getOptions().setReplicationMin(10); long blockId = createBlockHelper(TEST_FILE_1, mFileContext, ""); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected = ImmutableMap.of(blockId, 10); Assert.assertEquals(expected, mMockReplicationHandler.getSetReplicaRequests()); } @@ -355,7 +357,7 @@ public void heartbeatMultipleFilesUnderReplicated() throws Exception { mFileContext.getOptions().setReplicationMin(2); long blockId2 = createBlockHelper(TEST_FILE_2, mFileContext, ""); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected = ImmutableMap.of(blockId1, 1, blockId2, 2); Assert.assertEquals(expected, mMockReplicationHandler.getSetReplicaRequests()); } @@ -380,7 +382,7 @@ public void heartbeatFileUnderReplicatedAndLost() throws Exception { ImmutableMap.of(Constants.MEDIUM_MEM, 0L), ImmutableList.of(blockId), NO_BLOCKS_ON_LOCATION, NO_LOST_STORAGE, NO_METRICS); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); } @@ -390,7 +392,7 @@ public void heartbeatFileOverReplicatedBy1() throws Exception { long blockId = createBlockHelper(TEST_FILE_1, mFileContext, ""); addBlockLocationHelper(blockId, 2); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected = ImmutableMap.of(blockId, 1); Assert.assertEquals(expected, mMockReplicationHandler.getSetReplicaRequests()); } @@ -401,7 +403,7 @@ public void heartbeatFileOverReplicatedBy10() throws Exception { long blockId = createBlockHelper(TEST_FILE_1, mFileContext, ""); addBlockLocationHelper(blockId, 11); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected = ImmutableMap.of(blockId, 1); Assert.assertEquals(expected, mMockReplicationHandler.getSetReplicaRequests()); } @@ -415,7 +417,7 @@ public void heartbeatMultipleFilesOverReplicated() throws Exception { addBlockLocationHelper(blockId1, 2); addBlockLocationHelper(blockId2, 4); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected = ImmutableMap.of(blockId1, 1, blockId2, 2); Assert.assertEquals(expected, mMockReplicationHandler.getSetReplicaRequests()); } @@ -429,7 +431,7 @@ public void heartbeatFilesUnderAndOverReplicated() throws Exception { addBlockLocationHelper(blockId1, 1); addBlockLocationHelper(blockId2, 5); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected1 = ImmutableMap.of(blockId1, 2, blockId2, 3); Assert.assertEquals(expected1, mMockReplicationHandler.getSetReplicaRequests()); } @@ -447,7 +449,7 @@ public void heartbeatPartial() throws Exception { addBlockLocationHelper(blockId2, 1); addBlockLocationHelper(blockId3, 1); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); final Map replicateRequests = mMockReplicationHandler.getSetReplicaRequests(); System.out.println(replicateRequests); Assert.assertEquals(2, replicateRequests.size()); @@ -457,11 +459,11 @@ public void heartbeatPartial() throws Exception { mMockReplicationHandler.setJobStatus(1, Status.RUNNING); mMockReplicationHandler.setJobStatus(2, Status.RUNNING); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(0, replicateRequests.size()); mMockReplicationHandler.setJobStatus(1, Status.FAILED); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(1, replicateRequests.size()); Assert.assertEquals(3, replicateRequests.values().toArray()[0]); @@ -471,7 +473,7 @@ public void heartbeatPartial() throws Exception { mMockReplicationHandler.setJobStatus(2, Status.COMPLETED); mMockReplicationHandler.setJobStatus(3, Status.COMPLETED); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(1, replicateRequests.size()); Assert.assertTrue(replicateRequests.containsKey(blockId3)); Assert.assertEquals(3, replicateRequests.values().toArray()[0]); diff --git a/core/server/master/src/test/java/alluxio/master/file/scheduler/FileIterableTest.java b/core/server/master/src/test/java/alluxio/master/file/scheduler/FileIterableTest.java new file mode 100644 index 000000000000..67c8531ad6bd --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/scheduler/FileIterableTest.java @@ -0,0 +1,49 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.scheduler; + +import static org.junit.Assert.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; + +import alluxio.exception.AccessControlException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.NotFoundRuntimeException; +import alluxio.exception.runtime.UnauthenticatedRuntimeException; +import alluxio.master.file.FileSystemMaster; +import alluxio.master.job.FileIterable; +import alluxio.master.job.LoadJob; + +import org.junit.Test; + +import java.io.IOException; +import java.util.Optional; + +public class FileIterableTest { + + @Test + public void testException() + throws FileDoesNotExistException, AccessControlException, IOException, InvalidPathException { + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + String path = "test"; + doThrow(new FileDoesNotExistException(path)).when(fileSystemMaster).checkAccess(any(), any()); + FileIterable fileIterable = new FileIterable(fileSystemMaster, path, Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + assertThrows(NotFoundRuntimeException.class, fileIterable::iterator); + doThrow(new InvalidPathException(path)).when(fileSystemMaster).checkAccess(any(), any()); + assertThrows(NotFoundRuntimeException.class, fileIterable::iterator); + doThrow(new AccessControlException(path)).when(fileSystemMaster).checkAccess(any(), any()); + assertThrows(UnauthenticatedRuntimeException.class, fileIterable::iterator); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadJobTest.java b/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadJobTest.java new file mode 100644 index 000000000000..7b1f6d521adc --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadJobTest.java @@ -0,0 +1,216 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.scheduler; + +import static alluxio.master.file.scheduler.LoadTestUtils.generateRandomFileInfo; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + +import alluxio.Constants; +import alluxio.exception.AccessControlException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.grpc.Block; +import alluxio.grpc.JobProgressReportFormat; +import alluxio.master.file.FileSystemMaster; +import alluxio.master.file.contexts.ListStatusContext; +import alluxio.master.job.FileIterable; +import alluxio.master.job.LoadJob; +import alluxio.scheduler.job.JobState; +import alluxio.wire.FileInfo; + +import com.google.common.collect.ImmutableSet; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; +import java.util.OptionalLong; + +public class LoadJobTest { + @Test + public void testGetNextBatch() + throws FileDoesNotExistException, AccessControlException, IOException, InvalidPathException { + List fileInfos = generateRandomFileInfo(5, 20, 64 * Constants.MB); + + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + when(fileSystemMaster.listStatus(any(), any())).thenReturn(fileInfos); + String testPath = "test"; + Optional user = Optional.of("user"); + FileIterable files = + new FileIterable(fileSystemMaster, testPath, user, false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob load = + new LoadJob(testPath, user, "1", OptionalLong.empty(), false, false, files); + List batch = load.getNextBatchBlocks(10); + assertEquals(10, batch.size()); + assertEquals(1, batch.stream().map(Block::getUfsPath).distinct().count()); + + batch.forEach(load::addBlockToRetry); + + batch = load.getNextBatchBlocks(80); + assertEquals(80, batch.size()); + assertEquals(5, batch.stream().map(Block::getUfsPath).distinct().count()); + + batch = load.getNextBatchBlocks(80); + assertEquals(10, batch.size()); + assertEquals(1, batch.stream().map(Block::getUfsPath).distinct().count()); + + batch = load.getNextBatchBlocks(80); + assertEquals(10, batch.size()); + assertEquals(1, batch.stream().map(Block::getUfsPath).distinct().count()); + assertEquals(ImmutableSet.of(fileInfos.get(0).getUfsPath()), + batch.stream().map(Block::getUfsPath).collect(ImmutableSet.toImmutableSet())); + + batch = load.getNextBatchBlocks(80); + assertEquals(0, batch.size()); + } + + @Test + public void testGetNextBatchWithPartialListing() + throws FileDoesNotExistException, AccessControlException, IOException, InvalidPathException { + List fileInfos = generateRandomFileInfo(400, 2, 64 * Constants.MB); + + for (int i = 0; i < 100; i++) { + fileInfos.get(i).setInAlluxioPercentage(100); + } + for (int i = 200; i < 300; i++) { + fileInfos.get(i).setInAlluxioPercentage(100); + } + for (int i = 0; i < 10; i++) { + fileInfos.get(300 + i * i).setInAlluxioPercentage(100); + } + + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + when(fileSystemMaster.listStatus(any(), any())).thenAnswer(invocation -> { + ListStatusContext context = invocation.getArgument(1, ListStatusContext.class); + int fileSize = fileInfos.size(); + int from = 0; + int to = fileSize; + if (context.isPartialListing()) { + String startAfter = context.getPartialOptions().get().getStartAfter(); + int batch = context.getPartialOptions().get().getBatchSize(); + for (int i = 0; i < fileSize; i++) { + if (startAfter.equals(fileInfos.get(i).getPath())) { + from = i + 1; + break; + } + } + to = fileSize < from + batch ? fileSize : from + batch; + } + return fileInfos.subList(from, to); + }); + String testPath = "test"; + Optional user = Optional.of("user"); + FileIterable files = + new FileIterable(fileSystemMaster, testPath, user, true, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob load = + new LoadJob(testPath, user, "1", OptionalLong.empty(), true, false, files); + + List batch = load.getNextBatchBlocks(100); + assertEquals(100, batch.size()); + assertEquals(50, batch.stream().map(Block::getUfsPath).distinct().count()); + + batch = load.getNextBatchBlocks(200); + assertEquals(200, batch.size()); + assertEquals(100, batch.stream().map(Block::getUfsPath).distinct().count()); + + batch = load.getNextBatchBlocks(300); + assertEquals(80, batch.size()); + assertEquals(40, batch.stream().map(Block::getUfsPath).distinct().count()); + + batch = load.getNextBatchBlocks(100); + assertEquals(0, batch.size()); + } + + @Test + public void testIsHealthy() + throws FileDoesNotExistException, AccessControlException, IOException, InvalidPathException { + List fileInfos = generateRandomFileInfo(100, 5, 64 * 1024 * 1024); + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + when(fileSystemMaster.listStatus(any(), any())).thenReturn(fileInfos); + FileIterable files = new FileIterable(fileSystemMaster, "test", Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob loadJob = + new LoadJob("test", Optional.of("user"), "1", OptionalLong.empty(), false, false, files); + List batch = loadJob.getNextBatchBlocks(100); + assertTrue(loadJob.isHealthy()); + loadJob.getNextBatchBlocks(100); + assertTrue(loadJob.isHealthy()); + batch.forEach(loadJob::addBlockToRetry); + assertTrue(loadJob.isHealthy()); + batch = loadJob.getNextBatchBlocks(100); + assertTrue(loadJob.isHealthy()); + batch.forEach(loadJob::addBlockToRetry); + assertFalse(loadJob.isHealthy()); + } + + @Test + public void testLoadProgressReport() throws Exception { + List fileInfos = generateRandomFileInfo(10, 10, 64 * Constants.MB); + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + when(fileSystemMaster.listStatus(any(), any())).thenReturn(fileInfos); + FileIterable files = new FileIterable(fileSystemMaster, "test", Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob job = + spy(new LoadJob("test", Optional.of("user"), "1", OptionalLong.empty(), false, false, + files)); + when(job.getDurationInSec()).thenReturn(0L); + job.setJobState(JobState.RUNNING); + List blocks = job.getNextBatchBlocks(25); + job.addLoadedBytes(640 * Constants.MB); + String expectedTextReport = "\tSettings:\tbandwidth: unlimited\tverify: false\n" + + "\tJob State: RUNNING\n" + + "\tFiles Processed: 3\n" + + "\tBytes Loaded: 640.00MB out of 1600.00MB\n" + + "\tBlock load failure rate: 0.00%\n" + + "\tFiles Failed: 0\n"; + assertEquals(expectedTextReport, job.getProgress(JobProgressReportFormat.TEXT, false)); + assertEquals(expectedTextReport, job.getProgress(JobProgressReportFormat.TEXT, true)); + String expectedJsonReport = "{\"mVerbose\":false,\"mJobState\":\"RUNNING\"," + + "\"mVerificationEnabled\":false,\"mProcessedFileCount\":3," + + "\"mLoadedByteCount\":671088640,\"mTotalByteCount\":1677721600," + + "\"mFailurePercentage\":0.0,\"mFailedFileCount\":0}"; + assertEquals(expectedJsonReport, job.getProgress(JobProgressReportFormat.JSON, false)); + job.addBlockFailure(blocks.get(0), "Test error 1", 2); + job.addBlockFailure(blocks.get(4), "Test error 2", 2); + job.addBlockFailure(blocks.get(10), "Test error 3", 2); + job.failJob(new InternalRuntimeException("test")); + String expectedTextReportWithError = "\tSettings:\tbandwidth: unlimited\tverify: false\n" + + "\tJob State: FAILED (alluxio.exception.runtime.InternalRuntimeException: test)\n" + + "\tFiles Processed: 3\n" + + "\tBytes Loaded: 640.00MB out of 1600.00MB\n" + + "\tBlock load failure rate: 12.00%\n" + + "\tFiles Failed: 2\n"; + assertEquals(expectedTextReportWithError, + job.getProgress(JobProgressReportFormat.TEXT, false)); + String textReport = job.getProgress(JobProgressReportFormat.TEXT, true); + assertFalse(textReport.contains("Test error 1")); + assertTrue(textReport.contains("Test error 2")); + assertTrue(textReport.contains("Test error 3")); + String jsonReport = job.getProgress(JobProgressReportFormat.JSON, false); + assertTrue(jsonReport.contains("FAILED")); + assertTrue(jsonReport.contains("mFailureReason")); + assertFalse(jsonReport.contains("Test error 2")); + jsonReport = job.getProgress(JobProgressReportFormat.JSON, true); + assertFalse(jsonReport.contains("Test error 1")); + assertTrue(jsonReport.contains("Test error 2")); + assertTrue(jsonReport.contains("Test error 3")); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadTestUtils.java b/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadTestUtils.java new file mode 100644 index 000000000000..77265a9b76dd --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadTestUtils.java @@ -0,0 +1,120 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.scheduler; + +import alluxio.grpc.Block; +import alluxio.grpc.BlockStatus; +import alluxio.util.CommonUtils; +import alluxio.wire.BlockInfo; +import alluxio.wire.BlockLocation; +import alluxio.wire.FileBlockInfo; +import alluxio.wire.FileInfo; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import io.grpc.Status; + +import java.util.List; +import java.util.Random; +import java.util.stream.LongStream; + +public final class LoadTestUtils { + private LoadTestUtils() {} + + public static List generateRandomBlockStatus( + List blocks, double failureRate) { + ImmutableList.Builder blockStatus = ImmutableList.builder(); + for (Block block : blocks) { + if (Math.random() > failureRate) { + blockStatus.add(BlockStatus.newBuilder() + .setBlock(block) + .setCode(Status.OK.getCode().value()) + .build()); + } + else { + blockStatus.add(BlockStatus.newBuilder() + .setBlock(block) + .setCode((int) (Math.random() * 10) + 1) + .setRetryable(Math.random() > 0.5) + .build()); + } + } + return blockStatus.build(); + } + + public static List fileWithBlockLocations(List files, double ratio) { + ImmutableList.Builder newFiles = ImmutableList.builder(); + files.forEach(fileInfo -> { + ImmutableList.Builder newFileBlockInfo = ImmutableList.builder(); + fileInfo.getFileBlockInfos().forEach(fileBlockInfo -> { + BlockInfo info = new BlockInfo().setBlockId(fileBlockInfo.getBlockInfo().getBlockId()); + if (Math.random() <= ratio) { + info.setLocations(ImmutableList.of(new BlockLocation())); + } + newFileBlockInfo.add(new FileBlockInfo() + .setUfsLocations(fileBlockInfo.getUfsLocations()) + .setOffset(fileBlockInfo.getOffset()) + .setBlockInfo(info)); + }); + newFiles.add(new FileInfo() + .setUfsPath(fileInfo.getUfsPath()) + .setBlockSizeBytes(fileInfo.getBlockSizeBytes()) + .setBlockIds(fileInfo.getBlockIds()) + .setCompleted(true) + .setFileBlockInfos(newFileBlockInfo.build())); + }); + return newFiles.build(); + } + + public static List generateRandomFileInfo( + int fileCount, int blockCountPerFile, long blockSizeLimit) { + List fileInfos = Lists.newArrayList(); + for (int i = 0; i < fileCount; i++) { + FileInfo info = createFileInfo(blockCountPerFile, blockSizeLimit); + fileInfos.add(info); + } + return fileInfos; + } + + private static FileInfo createFileInfo(int blockCount, long blockSizeLimit) { + Random random = new Random(); + FileInfo info = new FileInfo(); + String ufs = CommonUtils.randomAlphaNumString(6); + String filePath = CommonUtils.randomAlphaNumString(6); + long blockSize = Math.abs(random.nextLong() % blockSizeLimit); + List blockIds = LongStream.range(0, blockCount) + .map(i -> random.nextLong()) + .boxed() + .collect(ImmutableList.toImmutableList()); + info.setUfsPath(ufs).setPath(filePath) + .setBlockSizeBytes(blockSize) + .setLength(blockSizeLimit * blockCount) + .setBlockIds(blockIds) + .setFileBlockInfos(blockIds + .stream() + .map(id -> LoadTestUtils.createFileBlockInfo(id, blockSizeLimit)) + .collect(ImmutableList.toImmutableList())) + .setCompleted(true) + .setPersisted(true); + return info; + } + + private static FileBlockInfo createFileBlockInfo(long id, long length) { + FileBlockInfo fileBlockInfo = new FileBlockInfo(); + BlockInfo blockInfo = new BlockInfo(); + blockInfo.setBlockId(id); + blockInfo.setLength(length); + fileBlockInfo.setBlockInfo(blockInfo); + fileBlockInfo.setOffset(new Random().nextInt(1000)); + return fileBlockInfo; + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/scheduler/SchedulerTest.java b/core/server/master/src/test/java/alluxio/master/file/scheduler/SchedulerTest.java new file mode 100644 index 000000000000..5c91987b1d5e --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/scheduler/SchedulerTest.java @@ -0,0 +1,541 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.scheduler; + +import static alluxio.master.file.scheduler.LoadTestUtils.fileWithBlockLocations; +import static alluxio.master.file.scheduler.LoadTestUtils.generateRandomBlockStatus; +import static alluxio.master.file.scheduler.LoadTestUtils.generateRandomFileInfo; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.argThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import alluxio.Constants; +import alluxio.client.block.stream.BlockWorkerClient; +import alluxio.client.file.FileSystemContext; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.conf.Source; +import alluxio.exception.AccessControlException; +import alluxio.exception.runtime.ResourceExhaustedRuntimeException; +import alluxio.exception.status.UnavailableException; +import alluxio.grpc.BlockStatus; +import alluxio.grpc.JobProgressReportFormat; +import alluxio.grpc.LoadRequest; +import alluxio.grpc.LoadResponse; +import alluxio.grpc.TaskStatus; +import alluxio.job.JobDescription; +import alluxio.master.file.FileSystemMaster; +import alluxio.master.job.FileIterable; +import alluxio.master.job.LoadJob; +import alluxio.master.journal.JournalContext; +import alluxio.master.scheduler.DefaultWorkerProvider; +import alluxio.master.scheduler.JournaledJobMetaStore; +import alluxio.master.scheduler.Scheduler; +import alluxio.proto.journal.Job; +import alluxio.resource.CloseableResource; +import alluxio.scheduler.job.JobState; +import alluxio.security.authentication.AuthenticatedClientUser; +import alluxio.wire.FileInfo; +import alluxio.wire.WorkerInfo; +import alluxio.wire.WorkerNetAddress; + +import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.SettableFuture; +import io.grpc.Status; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.IntStream; + +public final class SchedulerTest { + + @BeforeClass + public static void before() { + AuthenticatedClientUser.set("user"); + } + + @AfterClass + public static void after() { + AuthenticatedClientUser.remove(); + } + + @Test + public void testGetActiveWorkers() throws IOException { + FileSystemMaster fsMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + CloseableResource blockWorkerClient = mock(CloseableResource.class); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + when(fsMaster.getWorkerInfoList()) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(1).setAddress( + new WorkerNetAddress().setHost("worker1").setRpcPort(1234)), + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)))) + .thenThrow(new UnavailableException("test")) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)))) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(1).setAddress( + new WorkerNetAddress().setHost("worker1").setRpcPort(1234)), + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)))); + when(fileSystemContext.acquireBlockWorkerClient(any())).thenReturn(blockWorkerClient); + assertEquals(0, scheduler + .getActiveWorkers().size()); + scheduler.updateWorkers(); + assertEquals(2, scheduler + .getActiveWorkers().size()); + scheduler.updateWorkers(); + assertEquals(2, scheduler + .getActiveWorkers().size()); + scheduler.updateWorkers(); + assertEquals(1, scheduler + .getActiveWorkers().size()); + scheduler.updateWorkers(); + assertEquals(2, scheduler + .getActiveWorkers().size()); + } + + @Test + public void testSubmit() throws Exception { + String validLoadPath = "/path/to/load"; + FileSystemMaster fsMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fsMaster.createJournalContext()).thenReturn(journalContext); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + FileIterable files = + new FileIterable(fsMaster, validLoadPath, Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob loadJob = + new LoadJob(validLoadPath, Optional.of("user"), "1", OptionalLong.empty(), false, true, + files); + assertTrue(scheduler.submitJob(loadJob)); + verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() + && journalEntry.getLoadJob().getLoadPath().equals(validLoadPath) + && journalEntry.getLoadJob().getState() == Job.PJobState.CREATED + && !journalEntry.getLoadJob().hasBandwidth() + && journalEntry.getLoadJob().getVerify())); + assertEquals(1, scheduler + .getJobs().size()); + LoadJob job = (LoadJob) scheduler.getJobs().get(loadJob.getDescription()); + assertEquals(OptionalLong.empty(), job.getBandwidth()); + assertTrue(job.isVerificationEnabled()); + loadJob = + new LoadJob(validLoadPath, Optional.of("user"), "1", OptionalLong.of(1000), true, false, + files); + assertFalse(scheduler.submitJob(loadJob)); + verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() + && journalEntry.getLoadJob().getLoadPath().equals(validLoadPath) + && journalEntry.getLoadJob().getState() == Job.PJobState.CREATED + && journalEntry.getLoadJob().getBandwidth() == 1000 + && !journalEntry.getLoadJob().getPartialListing() // we don't update partialListing + && !journalEntry.getLoadJob().getVerify())); + assertEquals(1, scheduler + .getJobs().size()); + job = (LoadJob) scheduler.getJobs().get(loadJob.getDescription()); + assertEquals(1000, job.getBandwidth().getAsLong()); + assertFalse(job.isVerificationEnabled()); + } + + @Test + public void testStop() throws Exception { + String validLoadPath = "/path/to/load"; + FileSystemMaster fsMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fsMaster.createJournalContext()).thenReturn(journalContext); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + FileIterable files = + new FileIterable(fsMaster, validLoadPath, Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob job = + new LoadJob(validLoadPath, Optional.of("user"), "1", OptionalLong.of(100), false, true, + files); + assertTrue(scheduler.submitJob(job)); + verify(journalContext, times(1)).append(any()); + verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() + && journalEntry.getLoadJob().getLoadPath().equals(validLoadPath) + && journalEntry.getLoadJob().getState() == Job.PJobState.CREATED + && journalEntry.getLoadJob().getBandwidth() == 100 + && journalEntry.getLoadJob().getVerify())); + assertTrue(scheduler.stopJob(job.getDescription())); + verify(journalContext, times(2)).append(any()); + verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() + && journalEntry.getLoadJob().getLoadPath().equals(validLoadPath) + && journalEntry.getLoadJob().getState() == Job.PJobState.STOPPED + && journalEntry.getLoadJob().getBandwidth() == 100 + && journalEntry.getLoadJob().getVerify() + && journalEntry.getLoadJob().hasEndTime())); + assertFalse(scheduler.stopJob(job.getDescription())); + verify(journalContext, times(2)).append(any()); + assertFalse(scheduler.stopJob(JobDescription.newBuilder().setPath("/does/not/exist").build())); + verify(journalContext, times(2)).append(any()); + assertFalse(scheduler.submitJob(job)); + verify(journalContext, times(3)).append(any()); + assertTrue(scheduler.stopJob(job.getDescription())); + verify(journalContext, times(4)).append(any()); + } + + @Test + public void testSubmitExceedsCapacity() throws Exception { + FileSystemMaster fsMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fsMaster.createJournalContext()).thenReturn(journalContext); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + IntStream.range(0, 100).forEach( + i -> { + String path = String.format("/path/to/load/%d", i); + FileIterable files = new FileIterable(fsMaster, path, Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + assertTrue(scheduler.submitJob( + new LoadJob(path, Optional.of("user"), "1", OptionalLong.empty(), false, true, + files))); + }); + FileIterable files = + new FileIterable(fsMaster, "/path/to/load/101", Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + assertThrows(ResourceExhaustedRuntimeException.class, () -> scheduler.submitJob( + new LoadJob("/path/to/load/101", Optional.of("user"), "1", OptionalLong.empty(), false, + true, files))); + } + + @Test + public void testScheduling() throws Exception { + FileSystemMaster fsMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fsMaster.createJournalContext()).thenReturn(journalContext); + CloseableResource blockWorkerClientResource = mock(CloseableResource.class); + BlockWorkerClient blockWorkerClient = mock(BlockWorkerClient.class); + when(fsMaster.getWorkerInfoList()) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(1).setAddress( + new WorkerNetAddress().setHost("worker1").setRpcPort(1234)), + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)))) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)))) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(1).setAddress( + new WorkerNetAddress().setHost("worker1").setRpcPort(1234)), + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)), + new WorkerInfo().setId(3).setAddress( + new WorkerNetAddress().setHost("worker3").setRpcPort(1234)), + new WorkerInfo().setId(4).setAddress( + new WorkerNetAddress().setHost("worker4").setRpcPort(1234)), + new WorkerInfo().setId(5).setAddress( + new WorkerNetAddress().setHost("worker5").setRpcPort(1234)), + new WorkerInfo().setId(6).setAddress( + new WorkerNetAddress().setHost("worker6").setRpcPort(1234)), + new WorkerInfo().setId(7).setAddress( + new WorkerNetAddress().setHost("worker7").setRpcPort(1234)), + new WorkerInfo().setId(8).setAddress( + new WorkerNetAddress().setHost("worker8").setRpcPort(1234)), + new WorkerInfo().setId(9).setAddress( + new WorkerNetAddress().setHost("worker9").setRpcPort(1234)), + new WorkerInfo().setId(10).setAddress( + new WorkerNetAddress().setHost("worker10").setRpcPort(1234)))); + List fileInfos = generateRandomFileInfo(100, 50, 64 * Constants.MB); + when(fsMaster.listStatus(any(), any())) + .thenReturn(fileInfos) + .thenReturn(fileWithBlockLocations(fileInfos, 0.95)) + .thenReturn(fileWithBlockLocations(fileInfos, 1.1)); + when(fileSystemContext.acquireBlockWorkerClient(any())).thenReturn(blockWorkerClientResource); + when(blockWorkerClientResource.get()).thenReturn(blockWorkerClient); + AtomicInteger iteration = new AtomicInteger(); + when(blockWorkerClient.load(any())).thenAnswer(invocation -> { + LoadRequest request = invocation.getArgument(0); + return buildResponseFuture(request, iteration); + }); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + String path = "test"; + FileIterable files = new FileIterable(fsMaster, path, Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob loadJob = new LoadJob(path, Optional.of("user"), "1", + OptionalLong.of(1000), false, true, files); + scheduler.submitJob(loadJob); + verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() + && journalEntry.getLoadJob().getLoadPath().equals(path) + && journalEntry.getLoadJob().getState() == Job.PJobState.CREATED + && journalEntry.getLoadJob().getBandwidth() == 1000 + && journalEntry.getLoadJob().getVerify())); + + scheduler.start(); + while (!scheduler + .getJobProgress(loadJob.getDescription(), JobProgressReportFormat.TEXT, false) + .contains("SUCCEEDED")) { + assertFalse(scheduler.submitJob( + new LoadJob(path, Optional.of("user"), "1", OptionalLong.of(1000), false, true, files))); + Thread.sleep(1000); + } + Thread.sleep(1000); + scheduler.stop(); + assertEquals(JobState.SUCCEEDED, loadJob.getJobState()); + assertEquals(0, loadJob.getCurrentBlockCount()); + verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() + && journalEntry.getLoadJob().getLoadPath().equals(path) + && journalEntry.getLoadJob().getState() == Job.PJobState.SUCCEEDED + && journalEntry.getLoadJob().getBandwidth() == 1000 + && journalEntry.getLoadJob().getVerify())); + assertTrue(scheduler.submitJob(new LoadJob(path, "user", OptionalLong.of(1000), files))); + } + + private ListenableFuture buildResponseFuture(LoadRequest request, + AtomicInteger iteration) { + int failureRequestIteration = 50; + int exceptionRequestIteration = 70; + + iteration.getAndIncrement(); + List status; + if (iteration.get() == exceptionRequestIteration) { + // Test worker exception + SettableFuture responseFuture = SettableFuture.create(); + responseFuture.setException(new TimeoutException()); + return responseFuture; + } + else if (iteration.get() == failureRequestIteration) { + // Test worker failing the whole request + status = generateRandomBlockStatus(request.getBlocksList(), 1); + } + else { + status = generateRandomBlockStatus(request.getBlocksList(), 0.01); + } + LoadResponse.Builder response = LoadResponse.newBuilder(); + if (status.stream().allMatch(s -> s.getCode() == Status.OK.getCode().value())) { + response.setStatus(TaskStatus.SUCCESS); + } + else if (status.stream().allMatch(s -> s.getCode() != Status.OK.getCode().value())) { + response.setStatus(TaskStatus.FAILURE) + .addAllBlockStatus(status); + } + else { + response.setStatus(TaskStatus.PARTIAL_FAILURE) + .addAllBlockStatus(status.stream() + .filter(s -> s.getCode() != Status.OK.getCode().value()) + .collect(ImmutableList.toImmutableList())); + } + SettableFuture responseFuture = SettableFuture.create(); + responseFuture.set(response.build()); + return responseFuture; + } + + @Test + public void testSchedulingFullCapacity() throws Exception { + FileSystemMaster fsMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fsMaster.createJournalContext()).thenReturn(journalContext); + CloseableResource blockWorkerClientResource = mock(CloseableResource.class); + BlockWorkerClient blockWorkerClient = mock(BlockWorkerClient.class); + ImmutableList.Builder workerInfos = ImmutableList.builder(); + for (int i = 0; i < 1000; i++) { + workerInfos.add(new WorkerInfo().setId(i).setAddress( + new WorkerNetAddress().setHost("worker" + i).setRpcPort(1234))); + } + when(fsMaster.getWorkerInfoList()) + .thenReturn(workerInfos.build()); + List fileInfos = generateRandomFileInfo(2000, 50, 64 * Constants.MB); + when(fsMaster.listStatus(any(), any())) + .thenReturn(fileInfos); + + when(fileSystemContext.acquireBlockWorkerClient(any())).thenReturn(blockWorkerClientResource); + when(blockWorkerClientResource.get()).thenReturn(blockWorkerClient); + when(blockWorkerClient.load(any())).thenAnswer(invocation -> { + LoadResponse.Builder response = LoadResponse.newBuilder().setStatus(TaskStatus.SUCCESS); + SettableFuture responseFuture = SettableFuture.create(); + responseFuture.set(response.build()); + return responseFuture; + }); + FileIterable files = + new FileIterable(fsMaster, "test", Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + for (int i = 0; i < 100; i++) { + LoadJob loadJob = new LoadJob("test" + i, "user", OptionalLong.of(1000), files); + scheduler.submitJob(loadJob); + } + assertThrows(ResourceExhaustedRuntimeException.class, () -> scheduler.submitJob( + new LoadJob("/way/too/many", "user", OptionalLong.empty(), files))); + scheduler.start(); + while (scheduler + .getJobs().values().stream() + .anyMatch(loadJob -> loadJob.getJobState() != JobState.SUCCEEDED)) { + Thread.sleep(1000); + } + scheduler.stop(); + } + + @Test + public void testSchedulingWithException() throws Exception { + FileSystemMaster fsMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fsMaster.createJournalContext()).thenReturn(journalContext); + CloseableResource blockWorkerClientResource = mock(CloseableResource.class); + BlockWorkerClient blockWorkerClient = mock(BlockWorkerClient.class); + when(fsMaster.getWorkerInfoList()) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(1).setAddress( + new WorkerNetAddress().setHost("worker1").setRpcPort(1234)), + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)))); + when(fileSystemContext.acquireBlockWorkerClient(any())).thenReturn(blockWorkerClientResource); + when(blockWorkerClientResource.get()).thenReturn(blockWorkerClient); + List fileInfos = generateRandomFileInfo(100, 10, 64 * Constants.MB); + when(fsMaster.listStatus(any(), any())) + // Non-retryable exception, first load job should fail + .thenThrow(AccessControlException.class) + // Retryable exception, second load job should succeed + .thenThrow(new ResourceExhaustedRuntimeException("test", true)) + .thenReturn(fileInfos); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + scheduler.start(); + FileIterable files = + new FileIterable(fsMaster, "test", Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob job = new LoadJob("test", "user", OptionalLong.of(1000), files); + scheduler.submitJob(job); + while (!scheduler + .getJobProgress(job.getDescription(), JobProgressReportFormat.TEXT, false) + .contains("FAILED")) { + Thread.sleep(1000); + } + when(blockWorkerClient.load(any())).thenAnswer(invocation -> { + LoadResponse.Builder response = LoadResponse.newBuilder().setStatus(TaskStatus.SUCCESS); + SettableFuture responseFuture = SettableFuture.create(); + responseFuture.set(response.build()); + return responseFuture; + }); + job = new LoadJob("test", "user", OptionalLong.of(1000), files); + scheduler.submitJob(job); + while (!scheduler + .getJobProgress(job.getDescription(), JobProgressReportFormat.TEXT, false) + .contains("SUCCEEDED")) { + Thread.sleep(1000); + } + scheduler.stop(); + } + + @Test + public void testJobRetention() throws Exception { + Configuration.modifiableGlobal().set(PropertyKey.JOB_RETENTION_TIME, "0ms", Source.RUNTIME); + FileSystemMaster fsMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fsMaster.createJournalContext()).thenReturn(journalContext); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + scheduler.start(); + IntStream + .range(0, 5) + .forEach(i -> { + String path = String.format("/load/%d", i); + FileIterable files = new FileIterable(fsMaster, path, Optional.of("user"), + false, LoadJob.QUALIFIED_FILE_FILTER); + assertTrue(scheduler.submitJob( + new LoadJob(path, Optional.of("user"), "1", + OptionalLong.empty(), false, true, files))); + }); + assertEquals(5, scheduler + .getJobs().size()); + scheduler + .getJobs() + .get(JobDescription + .newBuilder() + .setPath("/load/1") + .setType("load") + .build()) + .setJobState(JobState.VERIFYING); + scheduler + .getJobs() + .get(JobDescription + .newBuilder() + .setPath("/load/2") + .setType("load") + .build()) + .setJobState(JobState.FAILED); + scheduler + .getJobs() + .get(JobDescription + .newBuilder() + .setPath("/load/3") + .setType("load") + .build()) + .setJobState(JobState.SUCCEEDED); + scheduler + .getJobs() + .get(JobDescription + .newBuilder() + .setPath("/load/4") + .setType("load") + .build()) + .setJobState(JobState.STOPPED); + scheduler.cleanupStaleJob(); + assertEquals(2, scheduler + .getJobs().size()); + assertTrue(scheduler + .getJobs().containsKey(JobDescription + .newBuilder() + .setPath("/load/0") + .setType("load") + .build())); + assertTrue(scheduler + .getJobs().containsKey(JobDescription + .newBuilder() + .setPath("/load/1") + .setType("load") + .build())); + IntStream.range(2, 5).forEach( + i -> assertFalse(scheduler + .getJobs().containsKey(JobDescription + .newBuilder() + .setPath("/load/" + i) + .setType("load") + .build()))); + Configuration.modifiableGlobal().unset(PropertyKey.JOB_RETENTION_TIME); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/journal/JournalContextTest.java b/core/server/master/src/test/java/alluxio/master/journal/JournalContextTest.java index a89af6602283..9d943b990e31 100644 --- a/core/server/master/src/test/java/alluxio/master/journal/JournalContextTest.java +++ b/core/server/master/src/test/java/alluxio/master/journal/JournalContextTest.java @@ -11,6 +11,7 @@ package alluxio.master.journal; +import static alluxio.master.journal.JournalTestUtils.createEmbeddedJournalTestPorts; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -88,6 +89,7 @@ public JournalContextTest(String journalType) { @Before public void before() throws Exception { Configuration.set(PropertyKey.MASTER_JOURNAL_TYPE, mJournalType); + createEmbeddedJournalTestPorts(1); mRegistry = new MasterRegistry(); mJournalSystem = JournalTestUtils.createJournalSystem(mTemporaryFolder); diff --git a/core/server/master/src/test/java/alluxio/master/journal/JournalTestUtils.java b/core/server/master/src/test/java/alluxio/master/journal/JournalTestUtils.java index 8cadd7e84a7d..a4f29bd7f8da 100644 --- a/core/server/master/src/test/java/alluxio/master/journal/JournalTestUtils.java +++ b/core/server/master/src/test/java/alluxio/master/journal/JournalTestUtils.java @@ -11,6 +11,9 @@ package alluxio.master.journal; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.master.PortRegistry; import alluxio.util.CommonUtils.ProcessType; import org.junit.rules.TemporaryFolder; @@ -18,12 +21,31 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; /** * Utility methods for testing against a journal system. */ public class JournalTestUtils { + public static List createEmbeddedJournalTestPorts(int count) throws IOException { + List ports = new ArrayList<>(); + StringBuilder addresses = new StringBuilder(); + for (int i = 0; i < count; i++) { + if (i != 0) { + addresses.append(","); + } + int port = PortRegistry.getFreePort(); + ports.add(port); + addresses.append(String.format("localhost:%d", port)); + } + Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_ADDRESSES, addresses.toString()); + Configuration.set(PropertyKey.MASTER_HOSTNAME, "localhost"); + Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_PORT, ports.get(0)); + return ports; + } + public static JournalSystem createJournalSystem(TemporaryFolder folder) { try { return createJournalSystem(folder.newFolder("journal").getAbsolutePath()); diff --git a/core/server/master/src/test/java/alluxio/master/journal/NoopRaftJournalSystem.java b/core/server/master/src/test/java/alluxio/master/journal/NoopRaftJournalSystem.java new file mode 100644 index 000000000000..4daaedd72b66 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/journal/NoopRaftJournalSystem.java @@ -0,0 +1,77 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal; + +import alluxio.master.Master; +import alluxio.master.journal.noop.NoopJournal; +import alluxio.master.journal.raft.RaftJournalSystem; +import alluxio.util.network.NetworkAddressUtils; + +import java.net.URI; +import java.net.URISyntaxException; + +/** + * A noop raft journal system for testing. + */ +public class NoopRaftJournalSystem extends RaftJournalSystem { + private boolean mIsLeader = false; + + /** + * Creates a raft journal system object. + * @throws URISyntaxException + */ + public NoopRaftJournalSystem() throws URISyntaxException { + super(new URI(""), NetworkAddressUtils.ServiceType.MASTER_RAFT); + } + + /** + * Sets the raft journal state. + * @param isLeader if the raft journal system should be a leader + */ + public synchronized void setIsLeader(boolean isLeader) { + mIsLeader = isLeader; + } + + @Override + public synchronized void start() { + } + + @Override + public synchronized void stop() { + } + + @Override + public synchronized boolean isLeader() { + return mIsLeader; + } + + @Override + public synchronized void startInternal() { + } + + @Override + public synchronized void stopInternal() { + } + + @Override + public synchronized void gainPrimacy() { + } + + @Override + public synchronized void losePrimacy() { + } + + @Override + public synchronized Journal createJournal(Master master) { + return new NoopJournal(); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalSystemMetricsTest.java b/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalSystemMetricsTest.java new file mode 100644 index 000000000000..d0e38fe72ba5 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalSystemMetricsTest.java @@ -0,0 +1,191 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.raft; + +import static alluxio.master.journal.JournalTestUtils.createEmbeddedJournalTestPorts; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import alluxio.conf.Configuration; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.util.network.NetworkAddressUtils.ServiceType; + +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.junit.After; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.mockito.Mockito; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Units tests for {@link RaftJournalSystem}'s metrics. + */ +public final class RaftJournalSystemMetricsTest { + + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + @After + public void after() { + Configuration.reloadProperties(); + } + + private void resetMetrics() { + MetricsSystem.resetAllMetrics(); + MetricsSystem.METRIC_REGISTRY.remove(MetricKey.CLUSTER_LEADER_INDEX.getName()); + MetricsSystem.METRIC_REGISTRY.remove(MetricKey.MASTER_ROLE_ID.getName()); + MetricsSystem.METRIC_REGISTRY.remove(MetricKey.CLUSTER_LEADER_ID.getName()); + } + + @Test + public void journalStateMachineMetrics() throws Exception { + resetMetrics(); + createEmbeddedJournalTestPorts(3); + RaftJournalSystem system = + new RaftJournalSystem(mFolder.newFolder().toURI(), ServiceType.MASTER_RAFT); + String[] metricsNames = new String[] { + MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_LAST_INDEX.getName(), + MetricKey.MASTER_JOURNAL_ENTRIES_SINCE_CHECKPOINT.getName(), + MetricKey.MASTER_JOURNAL_LAST_CHECKPOINT_TIME.getName(), + MetricKey.MASTER_JOURNAL_LAST_APPLIED_COMMIT_INDEX.getName(), + MetricKey.MASTER_JOURNAL_CHECKPOINT_WARN.getName(), + }; + JournalStateMachine stateMachine = new JournalStateMachine(system.getJournals(), system, + new SnapshotDirStateMachineStorage()); + for (String name : metricsNames) { + assertNotNull(MetricsSystem.METRIC_REGISTRY.getGauges().get(name)); + } + stateMachine.close(); + for (String name : metricsNames) { + assertNull(MetricsSystem.METRIC_REGISTRY.getGauges().get(name)); + } + JournalStateMachine newStateMachine = new JournalStateMachine(system.getJournals(), system, + new SnapshotDirStateMachineStorage()); + for (String name : metricsNames) { + assertNotNull(MetricsSystem.METRIC_REGISTRY.getGauges().get(name)); + } + newStateMachine.close(); + for (String name : metricsNames) { + assertNull(MetricsSystem.METRIC_REGISTRY.getGauges().get(name)); + } + } + + @Test + public void metrics() throws Exception { + resetMetrics(); + List ports = createEmbeddedJournalTestPorts(3); + + RaftJournalSystem raftJournalSystem = + new RaftJournalSystem(mFolder.newFolder().toURI(), ServiceType.MASTER_RAFT); + RaftJournalSystem system = Mockito.spy(raftJournalSystem); + RaftProtos.RoleInfoProto leaderInfo = RaftProtos.RoleInfoProto.newBuilder() + .setRole(RaftProtos.RaftPeerRole.LEADER).build(); + RaftProtos.RoleInfoProto followerInfo = RaftProtos.RoleInfoProto.newBuilder() + .setRole(RaftProtos.RaftPeerRole.FOLLOWER) + .setFollowerInfo(RaftProtos.FollowerInfoProto.newBuilder() + .setLeaderInfo(RaftProtos.ServerRpcProto.newBuilder() + .setId(RaftProtos.RaftPeerProto.newBuilder() + .setId(ByteString.copyFromUtf8(String.format("localhost_%d", ports.get(1))))))) + .build(); + + Map sn1 = new HashMap() { + { + put("foo", 1L); + } + }; + Mockito.doReturn(sn1).when(system).getCurrentSequenceNumbers(); + system.startInternal(); + Mockito.doReturn(null).when(system).getRaftRoleInfo(); + assertEquals(-1, getClusterLeaderIndex()); + assertEquals(-1, getMasterRoleId()); + assertEquals("WAITING_FOR_ELECTION", getClusterLeaderId()); + assertEquals(sn1, getMasterJournalSequenceNumbers(system)); + + Map sn2 = new HashMap() { + { + put("foo", 1L); + put("bar", 2L); + } + }; + Mockito.doReturn(sn2).when(system).getCurrentSequenceNumbers(); + system.gainPrimacy(); + Mockito.doReturn(leaderInfo).when(system).getRaftRoleInfo(); + assertEquals(0, getClusterLeaderIndex()); + assertEquals(RaftProtos.RaftPeerRole.LEADER_VALUE, getMasterRoleId()); + assertEquals(system.getLocalPeerId().toString(), getClusterLeaderId()); + assertEquals(sn2, getMasterJournalSequenceNumbers(system)); + + Map sn3 = new HashMap() { + { + put("foo", 1L); + put("bar", 2L); + put("baz", 3L); + } + }; + Mockito.doReturn(sn3).when(system).getCurrentSequenceNumbers(); + system.losePrimacy(); + Mockito.doReturn(followerInfo).when(system).getRaftRoleInfo(); + assertEquals(1, getClusterLeaderIndex()); + assertEquals(RaftProtos.RaftPeerRole.FOLLOWER_VALUE, getMasterRoleId()); + assertEquals(String.format("localhost_%d", ports.get(1)), getClusterLeaderId()); + assertEquals(sn3, getMasterJournalSequenceNumbers(system)); + + Map sn4 = new HashMap() { + { + put("foo", 1L); + put("bar", 2L); + put("baz", 3L); + put("qux", 4L); + } + }; + Mockito.doReturn(sn4).when(system).getCurrentSequenceNumbers(); + system.gainPrimacy(); + Mockito.doReturn(leaderInfo).when(system).getRaftRoleInfo(); + assertEquals(0, getClusterLeaderIndex()); + assertEquals(RaftProtos.RaftPeerRole.LEADER_VALUE, getMasterRoleId()); + assertEquals(system.getLocalPeerId().toString(), getClusterLeaderId()); + assertEquals(sn4, getMasterJournalSequenceNumbers(system)); + } + + private static int getClusterLeaderIndex() { + return (int) MetricsSystem.METRIC_REGISTRY.getGauges() + .get(MetricKey.CLUSTER_LEADER_INDEX.getName()).getValue(); + } + + private static int getMasterRoleId() { + return (int) MetricsSystem.METRIC_REGISTRY.getGauges() + .get(MetricKey.MASTER_ROLE_ID.getName()).getValue(); + } + + private static String getClusterLeaderId() { + return (String) MetricsSystem.METRIC_REGISTRY.getGauges() + .get(MetricKey.CLUSTER_LEADER_ID.getName()).getValue(); + } + + private static Map getMasterJournalSequenceNumbers(RaftJournalSystem system) { + Map sequenceNumber = system.getCurrentSequenceNumbers(); + Map result = new HashMap(); + for (String masterName : sequenceNumber.keySet()) { + long value = (long) MetricsSystem.METRIC_REGISTRY.getGauges() + .get(MetricKey.MASTER_JOURNAL_SEQUENCE_NUMBER.getName() + "." + masterName).getValue(); + result.put(masterName, value); + } + return result; + } +} diff --git a/core/server/master/src/test/java/alluxio/master/journal/raft/SnapshotReplicationManagerTest.java b/core/server/master/src/test/java/alluxio/master/journal/raft/SnapshotReplicationManagerTest.java deleted file mode 100644 index 02a90fec31cc..000000000000 --- a/core/server/master/src/test/java/alluxio/master/journal/raft/SnapshotReplicationManagerTest.java +++ /dev/null @@ -1,484 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.journal.raft; - -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.argThat; - -import alluxio.ConfigurationRule; -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.grpc.JournalQueryRequest; -import alluxio.grpc.NetAddress; -import alluxio.grpc.QuorumServerInfo; -import alluxio.grpc.RaftJournalServiceGrpc; -import alluxio.grpc.SnapshotData; -import alluxio.grpc.UploadSnapshotPRequest; -import alluxio.grpc.UploadSnapshotPResponse; -import alluxio.util.CommonUtils; -import alluxio.util.WaitForOptions; -import alluxio.util.io.BufferUtils; - -import io.grpc.ManagedChannel; -import io.grpc.Server; -import io.grpc.Status; -import io.grpc.StatusRuntimeException; -import io.grpc.inprocess.InProcessChannelBuilder; -import io.grpc.inprocess.InProcessServerBuilder; -import io.grpc.stub.StreamObserver; -import net.bytebuddy.utility.RandomString; -import org.apache.commons.io.FileUtils; -import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftClientReply; -import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.server.RaftServerConfigKeys; -import org.apache.ratis.server.protocol.TermIndex; -import org.apache.ratis.server.storage.RaftStorage; -import org.apache.ratis.server.storage.StorageImplUtils; -import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; -import org.junit.After; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.mockito.Mockito; -import org.mockito.stubbing.Answer; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionException; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; - -public class SnapshotReplicationManagerTest { - private static final int SNAPSHOT_SIZE = 100_000; - private static final int DEFAULT_SNAPSHOT_TERM = 0; - private static final int DEFAULT_SNAPSHOT_INDEX = 1; - - @Rule - public TemporaryFolder mFolder = new TemporaryFolder(); - - @Rule - public ConfigurationRule mConfigurationRule = - new ConfigurationRule(PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_CHUNK_SIZE, - "32KB", Configuration.modifiableGlobal()); - - private final WaitForOptions mWaitOptions = WaitForOptions.defaults().setTimeoutMs(30_000); - private SnapshotReplicationManager mLeaderSnapshotManager; - private RaftJournalSystem mLeader; - private SimpleStateMachineStorage mLeaderStore; - private final Map mFollowers = new HashMap<>(); - - private RaftJournalServiceClient mClient; - private Server mServer; - - private void before(int numFollowers) throws Exception { - Configuration.set(PropertyKey.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT, 550); - Configuration.set(PropertyKey.MASTER_JOURNAL_REQUEST_DATA_TIMEOUT, 550); - mLeader = Mockito.mock(RaftJournalSystem.class); - Mockito.when(mLeader.isLeader()).thenReturn(true); - Mockito.when(mLeader.getLocalPeerId()).thenReturn(RaftPeerId.getRaftPeerId("leader")); - mLeaderStore = getSimpleStateMachineStorage(); - mLeaderSnapshotManager = Mockito.spy(new SnapshotReplicationManager(mLeader, mLeaderStore)); - - String serverName = InProcessServerBuilder.generateName(); - mServer = InProcessServerBuilder.forName(serverName) - .directExecutor() - .addService(new RaftJournalServiceHandler(mLeaderSnapshotManager)).build(); - mServer.start(); - ManagedChannel channel = InProcessChannelBuilder.forName(serverName).directExecutor().build(); - RaftJournalServiceGrpc.RaftJournalServiceStub stub = RaftJournalServiceGrpc.newStub(channel); - // mock RaftJournalServiceClient - mClient = Mockito.mock(RaftJournalServiceClient.class); - Mockito.doNothing().when(mClient).close(); - // download rpc mock - Mockito.when(mClient.downloadSnapshot(any())).thenAnswer((args) -> { - StreamObserver responseObserver = args.getArgument(0, StreamObserver.class); - return stub.downloadSnapshot(responseObserver); - }); - // upload rpc mock - Mockito.when(mClient.uploadSnapshot(any())).thenAnswer((args) -> { - StreamObserver responseObserver = args.getArgument(0, StreamObserver.class); - return stub.uploadSnapshot(responseObserver); - }); - Mockito.doReturn(mClient).when(mLeaderSnapshotManager).createJournalServiceClient(); - - for (int i = 0; i < numFollowers; i++) { - Follower follower = new Follower(mClient); - mFollowers.put(follower.getRaftPeerId(), follower); - } - - List quorumServerInfos = mFollowers.values().stream().map(follower -> { - return QuorumServerInfo.newBuilder().setServerAddress( - NetAddress.newBuilder().setHost(follower.mHost).setRpcPort(follower.mRpcPort)).build(); - }).collect(Collectors.toList()); - - Mockito.when(mLeader.getQuorumServerInfoList()).thenReturn(quorumServerInfos); - Answer fn = (args) -> { - RaftPeerId peerId = args.getArgument(0, RaftPeerId.class); - Message message = args.getArgument(1, Message.class); - JournalQueryRequest queryRequest = JournalQueryRequest.parseFrom( - message.getContent().asReadOnlyByteBuffer()); - return CompletableFuture.supplyAsync(() -> { - CompletableFuture fut = CompletableFuture.supplyAsync(() -> { - Message response; - try { - response = mFollowers.get(peerId).mSnapshotManager.handleRequest(queryRequest); - } catch (IOException e) { - throw new CompletionException(e); - } - RaftClientReply reply = Mockito.mock(RaftClientReply.class); - Mockito.when(reply.getMessage()).thenReturn(response); - return reply; - }); - RaftClientReply result; - try { - if (args.getArguments().length == 3) { - result = fut.get(args.getArgument(2), TimeUnit.MILLISECONDS); - } else { - result = fut.get(); - } - return result; - } catch (Exception e) { - throw new CompletionException(e); - } - }); - }; - Mockito.when(mLeader.sendMessageAsync(any(), any())).thenAnswer(fn); - Mockito.when(mLeader.sendMessageAsync(any(), any(), anyLong())).thenAnswer(fn); - } - - private SimpleStateMachineStorage getSimpleStateMachineStorage() throws IOException { - RaftStorage rs = StorageImplUtils.newRaftStorage( - mFolder.newFolder(CommonUtils.randomAlphaNumString(6)), - RaftServerConfigKeys.Log.CorruptionPolicy.getDefault(), - RaftStorage.StartupOption.FORMAT, - RaftServerConfigKeys.STORAGE_FREE_SPACE_MIN_DEFAULT.getSize()); - rs.initialize(); - SimpleStateMachineStorage snapshotStore = new SimpleStateMachineStorage(); - snapshotStore.init(rs); - return snapshotStore; - } - - private void createSnapshotFile(SimpleStateMachineStorage storage) throws IOException { - createSnapshotFile(storage, DEFAULT_SNAPSHOT_TERM, DEFAULT_SNAPSHOT_INDEX); - } - - private void createSnapshotFile(SimpleStateMachineStorage storage, long term, long index) - throws IOException { - java.io.File file = storage.getSnapshotFile(term, index); - FileUtils.writeByteArrayToFile(file, BufferUtils.getIncreasingByteArray(SNAPSHOT_SIZE)); - storage.loadLatestSnapshot(); - } - - private void validateSnapshotFile(SimpleStateMachineStorage storage) throws IOException { - validateSnapshotFile(storage, DEFAULT_SNAPSHOT_TERM, DEFAULT_SNAPSHOT_INDEX); - } - - private void validateSnapshotFile(SimpleStateMachineStorage storage, long term, long index) - throws IOException { - SingleFileSnapshotInfo snapshot = storage.getLatestSnapshot(); - Assert.assertNotNull(snapshot); - Assert.assertEquals(TermIndex.valueOf(term, index), snapshot.getTermIndex()); - byte[] received = FileUtils.readFileToByteArray(snapshot.getFiles().get(0).getPath().toFile()); - Assert.assertTrue(BufferUtils.equalIncreasingByteArray(SNAPSHOT_SIZE, received)); - } - - @After - public void After() throws Exception { - mServer.shutdown(); - mServer.awaitTermination(); - } - - @Test - public void copySnapshotToLeader() throws Exception { - before(1); - Follower follower = mFollowers.values().stream().findFirst().get(); - createSnapshotFile(follower.mStore); - - Assert.assertNull(mLeaderStore.getLatestSnapshot()); - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - validateSnapshotFile(mLeaderStore); - } - - @Test - public void copySnapshotToFollower() throws Exception { - before(1); - createSnapshotFile(mLeaderStore); - - Follower follower = mFollowers.values().stream().findFirst().get(); - Assert.assertNull(follower.mStore.getLatestSnapshot()); - - follower.mSnapshotManager.installSnapshotFromLeader(); - - CommonUtils.waitFor("follower snapshot to complete", - () -> follower.mStore.getLatestSnapshot() != null, mWaitOptions); - validateSnapshotFile(follower.mStore); - } - - @Test - public void requestSnapshotEqualTermHigherIndex() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore); // create default 0, 1 snapshot - createSnapshotFile(secondFollower.mStore, 0, 2); // preferable to the default 0, 1 snapshot - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - // verify that the leader still requests and gets the best snapshot - validateSnapshotFile(mLeaderStore, 0, 2); - } - - @Test - public void failGetInfoEqualTermHigherIndex() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore); // create default 0, 1 snapshot - createSnapshotFile(secondFollower.mStore, 0, 2); // preferable to the default 0, 1 snapshot - // the second follower will not reply to the getInfo request, so the leader will request from - // the first after a timeout - secondFollower.disableGetInfo(); - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - // verify that the leader still requests and get the snapshot from the first follower - validateSnapshotFile(mLeaderStore, 0, 1); - } - - @Test - public void failSnapshotRequestEqualTermHigherIndex() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore); // create default 0, 1 snapshot - createSnapshotFile(secondFollower.mStore, 0, 2); // preferable to the default 0, 1 snapshot - // the second follower will not start the snapshot upload, so the leader will request from the - // first after a timeout - secondFollower.disableFollowerUpload(); - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - // verify that the leader still requests and get the snapshot from the first follower - validateSnapshotFile(mLeaderStore, 0, 1); - } - - @Test - public void failFailThenSuccess() throws Exception { - before(3); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore, 0, 1); - createSnapshotFile(secondFollower.mStore, 0, 1); - - firstFollower.disableFollowerUpload(); - secondFollower.disableGetInfo(); - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - try { - CommonUtils.waitForResult("upload failure", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower(), - (num) -> num == 1, - WaitForOptions.defaults().setInterval(10).setTimeoutMs(100)); - } catch (Exception e) { - // expected to fail: no snapshot could be uploaded - } - - Follower thirdFollower = followers.get(2); - createSnapshotFile(thirdFollower.mStore, 0, 2); - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - CommonUtils.waitForResult("upload failure", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower(), - (num) -> num == 2, mWaitOptions); - validateSnapshotFile(mLeaderStore, 0, 2); - } - - @Test - public void requestSnapshotHigherTermLowerIndex() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore, 1, 10); - createSnapshotFile(secondFollower.mStore, 2, 1); - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - // verify that the leader still requests and gets the best snapshot - validateSnapshotFile(mLeaderStore, 2, 1); - } - - @Test - public void installSnapshotsInSuccession() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore); // create default 0, 1 snapshot - - for (int i = 2; i < 12; i++) { - if (i % 2 == 0) { - createSnapshotFile(secondFollower.mStore, 0, i); - secondFollower.notifySnapshotInstalled(); - } else { - createSnapshotFile(firstFollower.mStore, 0, i); - firstFollower.notifySnapshotInstalled(); - } - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - validateSnapshotFile(mLeaderStore, 0, i); - } - } - - /** - * Simulates a {@link SnapshotDownloader} error. - */ - @Test - public void downloadFailure() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore); // create default 0, 1 snapshot - createSnapshotFile(secondFollower.mStore, 0, 2); // preferable to the default 0, 1 snapshot - - // make sure to error out when requesting the better snapshot from secondFollower - Mockito.doAnswer(mock -> { - SingleFileSnapshotInfo snapshot = secondFollower.mStore.getLatestSnapshot(); - StreamObserver responseObserver = - SnapshotUploader.forFollower(secondFollower.mStore, snapshot); - StreamObserver requestObserver = mClient - .uploadSnapshot(responseObserver); - requestObserver.onError(new IOException("failed snapshot upload")); - return null; - }).when(secondFollower.mSnapshotManager).sendSnapshotToLeader(); - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - // verify that the leader still requests and gets second best snapshot - validateSnapshotFile(mLeaderStore); - } - - /** - * Simulates a {@link SnapshotUploader} error. - */ - @Test - public void uploadFailure() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore); // create default 0, 1 snapshot - createSnapshotFile(secondFollower.mStore, 0, 2); // preferable to the default 0, 1 snapshot - - // make sure to error out when requesting the better snapshot from secondFollower - Mockito.doAnswer(mock -> { - SingleFileSnapshotInfo snapshot = secondFollower.mStore.getLatestSnapshot(); - StreamObserver responseObserver = - SnapshotUploader.forFollower(secondFollower.mStore, snapshot); - StreamObserver requestObserver = mClient - .uploadSnapshot(responseObserver); - responseObserver.onError(new StatusRuntimeException(Status.UNAVAILABLE)); - requestObserver.onNext(UploadSnapshotPRequest.newBuilder() - .setData(SnapshotData.newBuilder() - .setSnapshotTerm(snapshot.getTerm()) - .setSnapshotIndex(snapshot.getIndex()) - .setOffset(0)) - .build()); - return null; - }).when(secondFollower.mSnapshotManager).sendSnapshotToLeader(); - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - // verify that the leader still requests and gets second best snapshot - validateSnapshotFile(mLeaderStore); - } - - private class Follower { - final String mHost; - final int mRpcPort; - final SnapshotReplicationManager mSnapshotManager; - RaftJournalSystem mJournalSystem; - SimpleStateMachineStorage mStore; - - Follower(RaftJournalServiceClient client) throws IOException { - mHost = String.format("follower-%s", RandomString.make()); - mRpcPort = ThreadLocalRandom.current().nextInt(10_000, 99_999); - mStore = getSimpleStateMachineStorage(); - mJournalSystem = Mockito.mock(RaftJournalSystem.class); - mSnapshotManager = Mockito.spy(new SnapshotReplicationManager(mJournalSystem, mStore)); - Mockito.doReturn(client).when(mSnapshotManager).createJournalServiceClient(); - } - - void notifySnapshotInstalled() { - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } - } - - void disableFollowerUpload() throws IOException { - Mockito.doNothing().when(mSnapshotManager).sendSnapshotToLeader(); - } - - void disableGetInfo() throws IOException { - Mockito.doAnswer((args) -> { - synchronized (mSnapshotManager) { - // we sleep so nothing is returned - mSnapshotManager.wait(Configuration.global().getMs( - PropertyKey.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT)); - } - throw new IOException("get info disabled"); - }).when(mSnapshotManager) - .handleRequest(argThat(JournalQueryRequest::hasSnapshotInfoRequest)); - } - - RaftPeerId getRaftPeerId() { - return RaftPeerId.valueOf(String.format("%s_%d", mHost, mRpcPort)); - } - } -} diff --git a/core/server/master/src/test/java/alluxio/master/meta/AlluxioMasterRestServiceHandlerTest.java b/core/server/master/src/test/java/alluxio/master/meta/AlluxioMasterRestServiceHandlerTest.java index b49adb4ee03e..61a184cc2072 100644 --- a/core/server/master/src/test/java/alluxio/master/meta/AlluxioMasterRestServiceHandlerTest.java +++ b/core/server/master/src/test/java/alluxio/master/meta/AlluxioMasterRestServiceHandlerTest.java @@ -24,12 +24,14 @@ import alluxio.AlluxioURI; import alluxio.ConfigurationRule; import alluxio.Constants; +import alluxio.DefaultStorageTierAssoc; import alluxio.RuntimeConstants; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.grpc.RegisterWorkerPOptions; import alluxio.grpc.StorageList; import alluxio.master.AlluxioMasterProcess; +import alluxio.master.AlwaysPrimaryPrimarySelector; import alluxio.master.CoreMasterContext; import alluxio.master.MasterProcess; import alluxio.master.MasterRegistry; @@ -38,6 +40,7 @@ import alluxio.master.block.BlockMasterFactory; import alluxio.master.file.FileSystemMaster; import alluxio.master.file.FileSystemMasterFactory; +import alluxio.master.journal.noop.NoopJournalSystem; import alluxio.master.metrics.MetricsMaster; import alluxio.master.metrics.MetricsMasterFactory; import alluxio.metrics.MetricKey; @@ -46,9 +49,11 @@ import alluxio.underfs.UnderFileSystem; import alluxio.underfs.UnderFileSystemFactory; import alluxio.underfs.UnderFileSystemFactoryRegistry; +import alluxio.util.webui.UIFileInfo; import alluxio.web.MasterWebServer; import alluxio.wire.AlluxioMasterInfo; import alluxio.wire.Capacity; +import alluxio.wire.MasterWebUILogs; import alluxio.wire.MountPointInfo; import alluxio.wire.WorkerInfo; import alluxio.wire.WorkerNetAddress; @@ -58,6 +63,7 @@ import com.codahale.metrics.MetricSet; import com.google.common.collect.ImmutableMap; import org.junit.After; +import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -68,6 +74,7 @@ import org.powermock.core.classloader.annotations.PrepareForTest; import org.powermock.modules.junit4.PowerMockRunner; +import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; import java.util.Arrays; @@ -133,7 +140,8 @@ public void before() throws Exception { mMasterProcess = PowerMockito.mock(AlluxioMasterProcess.class); ServletContext context = mock(ServletContext.class); mRegistry = new MasterRegistry(); - CoreMasterContext masterContext = MasterTestUtils.testMasterContext(); + CoreMasterContext masterContext = MasterTestUtils.testMasterContext(new NoopJournalSystem(), + null, new AlwaysPrimaryPrimarySelector()); mMetricsMaster = new MetricsMasterFactory().create(mRegistry, masterContext); mRegistry.add(MetricsMaster.class, mMetricsMaster); registerMockUfs(); @@ -296,4 +304,61 @@ public void isMounted() { assertFalse(handler.isMounted(hdfsUri)); assertFalse(handler.isMounted(MetricsSystem.escape(new AlluxioURI(hdfsUri)))); } + + @Test + public void testGetWebUILogsByRegex() throws IOException { + File logsDir = mTestFolder.newFolder("logs"); + logsDir.mkdirs(); + String[] wantedFiles = new String[] { + "master.log", + "master.log.1", + "master.log.100", + "master.out", + "master.out.1", + "master.out.100", + "master.txt", + "master.gc.log", + "master.gc.log.2023-09-15-14", + "alluxio-master-exit-metrics-20230526-085548.json" + }; + Arrays.sort(wantedFiles); + String[] unwantedFiles = new String[] { + "master.log.a", + "master.loga", + "master.bin", + }; + + for (String fileName : wantedFiles) { + File file0 = new File(logsDir, fileName); + file0.createNewFile(); + } + for (String fileName : unwantedFiles) { + File file0 = new File(logsDir, fileName); + file0.createNewFile(); + } + + Configuration.set(PropertyKey.LOGS_DIR, logsDir.getPath()); + FileSystemMaster mockMaster = mock(FileSystemMaster.class); + BlockMaster mockBlockMaster = mock(BlockMaster.class); + + AlluxioMasterProcess masterProcess = PowerMockito.mock(AlluxioMasterProcess.class); + when(masterProcess.getMaster(FileSystemMaster.class)).thenReturn(mockMaster); + when(masterProcess.getMaster(BlockMaster.class)).thenReturn(mockBlockMaster); + when(mockBlockMaster.getGlobalStorageTierAssoc()).thenReturn( + new DefaultStorageTierAssoc( + PropertyKey.MASTER_TIERED_STORE_GLOBAL_LEVELS, + PropertyKey.Template.MASTER_TIERED_STORE_GLOBAL_LEVEL_ALIAS)); + + ServletContext context = mock(ServletContext.class); + when(context.getAttribute(MasterWebServer.ALLUXIO_MASTER_SERVLET_RESOURCE_KEY)).thenReturn( + masterProcess); + AlluxioMasterRestServiceHandler handler = new AlluxioMasterRestServiceHandler(context); + Response response = handler.getWebUILogs("", "0", "", "20"); + Assert.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus()); + List fileInfos = ((MasterWebUILogs) response.getEntity()).getFileInfos(); + String[] actualFileNameArray = + fileInfos.stream().map(fileInfo -> fileInfo.getName()).toArray(String[]::new); + Arrays.sort(actualFileNameArray); + Assert.assertArrayEquals(wantedFiles, actualFileNameArray); + } } diff --git a/core/server/master/src/test/java/alluxio/master/meta/JournalSpaceMonitorTest.java b/core/server/master/src/test/java/alluxio/master/meta/JournalSpaceMonitorTest.java index eb638ae88800..8054599ee0a6 100644 --- a/core/server/master/src/test/java/alluxio/master/meta/JournalSpaceMonitorTest.java +++ b/core/server/master/src/test/java/alluxio/master/meta/JournalSpaceMonitorTest.java @@ -82,7 +82,7 @@ public void testLoggingPositive() throws IOException, InterruptedException { JournalSpaceMonitor monitor = Mockito.spy( new JournalSpaceMonitor(Paths.get(".").toAbsolutePath().toString(), 90)); doReturn(new CommandReturn(0, CMD_RETURN_MOCK)).when(monitor).getRawDiskInfo(); - monitor.heartbeat(); + monitor.heartbeat(Long.MAX_VALUE); assertTrue(mLogger.wasLoggedWithLevel("The journal disk /dev/nvme0n1p2 backing the journal " + "has only .* space left", Level.WARN)); } @@ -92,7 +92,7 @@ public void testLoggingNegative() throws IOException, InterruptedException { JournalSpaceMonitor monitor = Mockito.spy( new JournalSpaceMonitor(Paths.get(".").toAbsolutePath().toString(), 10)); doReturn(new CommandReturn(0, CMD_RETURN_MOCK)).when(monitor).getRawDiskInfo(); - monitor.heartbeat(); + monitor.heartbeat(Long.MAX_VALUE); assertFalse(mLogger.wasLoggedWithLevel("The journal disk /dev/nvme0n1p2 backing the journal " + "has only .* space left", Level.WARN)); } diff --git a/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreCheckpointTest.java b/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreCheckpointTest.java new file mode 100644 index 000000000000..72f384feb9e9 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreCheckpointTest.java @@ -0,0 +1,129 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.master.MasterUtils; +import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.CreateFileContext; +import alluxio.master.file.meta.Inode; +import alluxio.master.file.meta.InodeLockManager; +import alluxio.master.file.meta.MutableInodeDirectory; +import alluxio.master.journal.checkpoint.CheckpointInputStream; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.Collection; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +@RunWith(Parameterized.class) +public class InodeStoreCheckpointTest { + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][] { + {MetastoreType.HEAP, 0}, + {MetastoreType.ROCKS, PropertyKey.MASTER_METASTORE_INODE_CACHE_MAX_SIZE.getDefaultValue()}, + {MetastoreType.ROCKS, 0} + }); + } + + @Parameterized.Parameter(0) + public MetastoreType mType; + + @Parameterized.Parameter(1) + public int mCacheSize; + + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + private InodeStore mBaseInodeStore; + private InodeStore mNewInodeStore; + + private final MutableInodeDirectory mRoot = + MutableInodeDirectory.create(0, -1, "", CreateDirectoryContext.defaults()); + + private InodeStore createInodeStore() throws IOException { + return MasterUtils.getInodeStoreFactory(mFolder.newFolder().getAbsolutePath()) + .apply(new InodeLockManager()); + } + + @Before + public void before() throws IOException { + Configuration.set(PropertyKey.MASTER_INODE_METASTORE, mType); + Configuration.set(PropertyKey.MASTER_METASTORE_INODE_CACHE_MAX_SIZE, mCacheSize); + CreateDirectoryContext c = CreateDirectoryContext.defaults(); + CreateFileContext cf = CreateFileContext.defaults(); + mBaseInodeStore = createInodeStore(); + mBaseInodeStore.writeNewInode(MutableInodeDirectory.create(0, -1, "", c)); + mBaseInodeStore.writeNewInode(MutableInodeDirectory.create(1, 0, "one", c)); + mBaseInodeStore.writeNewInode(MutableInodeDirectory.create(2, 0, "two", c)); + mBaseInodeStore.writeNewInode(MutableInodeDirectory.create(3, 0, "three", c)); + mBaseInodeStore.remove(2L); + } + + @After + public void after() { + Optional root = mNewInodeStore.get(mRoot.getId()); + Assert.assertTrue(root.isPresent()); + Optional one = mNewInodeStore.get(1); + Assert.assertTrue(one.isPresent()); + Assert.assertEquals(0, one.get().getParentId()); + Assert.assertTrue(one.get().isDirectory()); + Assert.assertEquals("one", one.get().getName()); + Optional two = mNewInodeStore.get(2); + Assert.assertFalse(two.isPresent()); + Optional three = mNewInodeStore.get(3); + Assert.assertTrue(three.isPresent()); + Assert.assertEquals(0, three.get().getParentId()); + Assert.assertEquals("three", three.get().getName()); + + mBaseInodeStore.close(); + mNewInodeStore.close(); + } + + @Test + public void testOutputStream() throws IOException, InterruptedException { + File checkpoint = mFolder.newFile("checkpoint"); + try (OutputStream outputStream = Files.newOutputStream(checkpoint.toPath())) { + mBaseInodeStore.writeToCheckpoint(outputStream); + } + mNewInodeStore = createInodeStore(); + try (CheckpointInputStream inputStream = + new CheckpointInputStream(Files.newInputStream(checkpoint.toPath()))) { + mNewInodeStore.restoreFromCheckpoint(inputStream); + } + } + + @Test + public void testDirectory() throws IOException { + File dir = mFolder.newFolder("checkpoint"); + ExecutorService executor = Executors.newFixedThreadPool(2); + mBaseInodeStore.writeToCheckpoint(dir, executor).join(); + mNewInodeStore = createInodeStore(); + mNewInodeStore.restoreFromCheckpoint(dir, executor).join(); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTest.java b/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTest.java index 72ef4e72f4a3..320f9c287a3f 100644 --- a/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTest.java +++ b/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTest.java @@ -18,93 +18,36 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assume.assumeTrue; -import alluxio.AlluxioTestDirectory; import alluxio.ConfigurationRule; -import alluxio.concurrent.LockMode; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; -import alluxio.master.file.contexts.CreateDirectoryContext; -import alluxio.master.file.contexts.CreateFileContext; -import alluxio.master.file.meta.Edge; import alluxio.master.file.meta.Inode; import alluxio.master.file.meta.InodeLockManager; -import alluxio.master.file.meta.InodeView; import alluxio.master.file.meta.MutableInode; import alluxio.master.file.meta.MutableInodeDirectory; import alluxio.master.file.meta.MutableInodeFile; import alluxio.master.metastore.InodeStore.WriteBatch; import alluxio.master.metastore.caching.CachingInodeStore; -import alluxio.master.metastore.heap.HeapInodeStore; import alluxio.master.metastore.rocks.RocksInodeStore; import alluxio.resource.CloseableIterator; -import alluxio.resource.LockResource; -import com.google.common.collect.ImmutableMap; -import io.netty.util.ResourceLeakDetector; -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; import org.rocksdb.RocksDBException; import java.io.File; import java.nio.charset.Charset; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Optional; import java.util.function.Function; @RunWith(Parameterized.class) -public class InodeStoreTest { - private static final int CACHE_SIZE = 16; - private static String sDir; - private static final String CONF_NAME = "/rocks-inode.ini"; - - @Parameters - public static Iterable> parameters() throws Exception { - sDir = - AlluxioTestDirectory.createTemporaryDirectory("inode-store-test").getAbsolutePath(); - File confFile = new File(sDir + CONF_NAME); - writeStringToFile(confFile, ROCKS_CONFIG, (Charset) null); - - return Arrays.asList( - lockManager -> new HeapInodeStore(), - lockManager -> new RocksInodeStore(sDir), - lockManager -> new CachingInodeStore(new RocksInodeStore(sDir), lockManager)); - } - - @Rule - public ConfigurationRule mConf = new ConfigurationRule( - ImmutableMap.of(PropertyKey.MASTER_METASTORE_INODE_CACHE_MAX_SIZE, CACHE_SIZE, - PropertyKey.MASTER_METASTORE_INODE_CACHE_EVICT_BATCH_SIZE, 5, - PropertyKey.LEAK_DETECTOR_LEVEL, ResourceLeakDetector.Level.PARANOID, - PropertyKey.LEAK_DETECTOR_EXIT_ON_LEAK, true), - Configuration.modifiableGlobal()); - - private final MutableInodeDirectory mRoot = inodeDir(0, -1, ""); - - private final Function mStoreBuilder; - private InodeStore mStore; - private InodeLockManager mLockManager; - +public class InodeStoreTest extends InodeStoreTestBase { public InodeStoreTest(Function store) { - mStoreBuilder = store; - } - - @Before - public void before() { - mLockManager = new InodeLockManager(); - mStore = mStoreBuilder.apply(mLockManager); - } - - @After - public void after() { - mStore.close(); + super(store); } @Test @@ -304,63 +247,4 @@ public void manyOperations() { assertEquals(0, CloseableIterator.size(mStore.getChildren(mStore.get(middleDir - 1).get().asDirectory()))); } - - private void writeInode(MutableInode inode) { - try (LockResource lr = mLockManager.lockInode(inode, LockMode.WRITE, false)) { - mStore.writeInode(inode); - } - } - - private void writeEdge(MutableInode parent, MutableInode child) { - try (LockResource lr = - mLockManager.lockEdge(new Edge(parent.getId(), child.getName()), - LockMode.WRITE, false)) { - mStore.addChild(parent.getId(), child); - } - } - - private void removeInode(InodeView inode) { - try (LockResource lr = mLockManager.lockInode(inode, LockMode.WRITE, false)) { - mStore.remove(inode); - } - } - - private void removeParentEdge(InodeView child) { - try (LockResource lr = mLockManager - .lockEdge(new Edge(child.getParentId(), child.getName()), LockMode.WRITE, false)) { - mStore.removeChild(child.getParentId(), child.getName()); - } - } - - private static MutableInodeDirectory inodeDir(long id, long parentId, String name) { - return MutableInodeDirectory.create(id, parentId, name, CreateDirectoryContext.defaults()); - } - - private static MutableInodeFile inodeFile(long containerId, long parentId, String name) { - return MutableInodeFile.create(containerId, parentId, name, 0, CreateFileContext.defaults()); - } - - // RocksDB configuration options used for the unit tests - private static final String ROCKS_CONFIG = "[Version]\n" - + " rocksdb_version=7.0.3\n" - + " options_file_version=1.1\n" - + "\n" - + "[DBOptions]\n" - + " create_missing_column_families=true\n" - + " create_if_missing=true\n" - + "\n" - + "\n" - + "[CFOptions \"default\"]\n" - + "\n" - + " \n" - + "[TableOptions/BlockBasedTable \"default\"]\n" - + "\n" - + "\n" - + "[CFOptions \"inodes\"]\n" - + " \n" - + "[TableOptions/BlockBasedTable \"inodes\"]\n" - + " \n" - + "\n" - + "[CFOptions \"edges\"]\n" - + " \n"; } diff --git a/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTestBase.java b/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTestBase.java new file mode 100644 index 000000000000..3d7ab1abe0e4 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTestBase.java @@ -0,0 +1,151 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore; + +import static org.apache.commons.io.FileUtils.writeStringToFile; + +import alluxio.AlluxioTestDirectory; +import alluxio.ConfigurationRule; +import alluxio.concurrent.LockMode; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.CreateFileContext; +import alluxio.master.file.meta.Edge; +import alluxio.master.file.meta.InodeLockManager; +import alluxio.master.file.meta.InodeView; +import alluxio.master.file.meta.MutableInode; +import alluxio.master.file.meta.MutableInodeDirectory; +import alluxio.master.file.meta.MutableInodeFile; +import alluxio.master.metastore.caching.CachingInodeStore; +import alluxio.master.metastore.heap.HeapInodeStore; +import alluxio.master.metastore.rocks.RocksInodeStore; +import alluxio.resource.LockResource; + +import com.google.common.collect.ImmutableMap; +import io.netty.util.ResourceLeakDetector; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.runners.Parameterized.Parameters; + +import java.io.File; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.function.Function; + +public class InodeStoreTestBase { + protected static final int CACHE_SIZE = 16; + protected static String sDir; + protected static final String CONF_NAME = "/rocks-inode.ini"; + + @Parameters + public static Iterable> parameters() throws Exception { + sDir = + AlluxioTestDirectory.createTemporaryDirectory("inode-store-test").getAbsolutePath(); + File confFile = new File(sDir + CONF_NAME); + writeStringToFile(confFile, ROCKS_CONFIG, (Charset) null); + + return Arrays.asList( + lockManager -> new HeapInodeStore(), + lockManager -> new RocksInodeStore(sDir), + lockManager -> new CachingInodeStore(new RocksInodeStore(sDir), lockManager)); + } + + @Rule + public ConfigurationRule mConf = new ConfigurationRule( + ImmutableMap.of(PropertyKey.MASTER_METASTORE_INODE_CACHE_MAX_SIZE, CACHE_SIZE, + PropertyKey.MASTER_METASTORE_INODE_CACHE_EVICT_BATCH_SIZE, 5, + PropertyKey.LEAK_DETECTOR_LEVEL, ResourceLeakDetector.Level.PARANOID, + PropertyKey.LEAK_DETECTOR_EXIT_ON_LEAK, true), + Configuration.modifiableGlobal()); + + protected final MutableInodeDirectory mRoot = inodeDir(0, -1, ""); + + protected final Function mStoreBuilder; + protected InodeStore mStore; + protected InodeLockManager mLockManager; + + public InodeStoreTestBase(Function store) { + mStoreBuilder = store; + } + + @Before + public void before() { + mLockManager = new InodeLockManager(); + mStore = mStoreBuilder.apply(mLockManager); + } + + @After + public void after() { + mStore.close(); + } + + protected void writeInode(MutableInode inode) { + try (LockResource lr = mLockManager.lockInode(inode, LockMode.WRITE, false)) { + mStore.writeInode(inode); + } + } + + protected void writeEdge(MutableInode parent, MutableInode child) { + try (LockResource lr = + mLockManager.lockEdge(new Edge(parent.getId(), child.getName()), + LockMode.WRITE, false)) { + mStore.addChild(parent.getId(), child); + } + } + + protected void removeInode(InodeView inode) { + try (LockResource lr = mLockManager.lockInode(inode, LockMode.WRITE, false)) { + mStore.remove(inode); + } + } + + protected void removeParentEdge(InodeView child) { + try (LockResource lr = mLockManager + .lockEdge(new Edge(child.getParentId(), child.getName()), LockMode.WRITE, false)) { + mStore.removeChild(child.getParentId(), child.getName()); + } + } + + protected static MutableInodeDirectory inodeDir(long id, long parentId, String name) { + return MutableInodeDirectory.create(id, parentId, name, CreateDirectoryContext.defaults()); + } + + protected static MutableInodeFile inodeFile(long containerId, long parentId, String name) { + return MutableInodeFile.create(containerId, parentId, name, 0, CreateFileContext.defaults()); + } + + // RocksDB configuration options used for the unit tests + private static final String ROCKS_CONFIG = "[Version]\n" + + " rocksdb_version=7.0.3\n" + + " options_file_version=1.1\n" + + "\n" + + "[DBOptions]\n" + + " create_missing_column_families=true\n" + + " create_if_missing=true\n" + + "\n" + + "\n" + + "[CFOptions \"default\"]\n" + + "\n" + + " \n" + + "[TableOptions/BlockBasedTable \"default\"]\n" + + "\n" + + "\n" + + "[CFOptions \"inodes\"]\n" + + " \n" + + "[TableOptions/BlockBasedTable \"inodes\"]\n" + + " \n" + + "\n" + + "[CFOptions \"edges\"]\n" + + " \n"; +} diff --git a/core/server/master/src/test/java/alluxio/master/metastore/RecursiveInodeIteratorTest.java b/core/server/master/src/test/java/alluxio/master/metastore/RecursiveInodeIteratorTest.java new file mode 100644 index 000000000000..871b9b7b7803 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/metastore/RecursiveInodeIteratorTest.java @@ -0,0 +1,415 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore; + +import static org.junit.Assert.assertEquals; + +import alluxio.AlluxioURI; +import alluxio.file.options.DescendantType; +import alluxio.master.block.ContainerIdGenerable; +import alluxio.master.file.meta.InodeDirectoryIdGenerator; +import alluxio.master.file.meta.InodeIterationResult; +import alluxio.master.file.meta.InodeLockManager; +import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.LockedInodePath; +import alluxio.master.file.meta.LockingScheme; +import alluxio.master.file.meta.MountTable; +import alluxio.master.file.meta.MutableInode; +import alluxio.master.file.meta.options.MountInfo; +import alluxio.master.journal.NoopJournalContext; +import alluxio.underfs.UfsManager; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.mockito.Mockito; + +import java.time.Clock; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; + +@RunWith(Parameterized.class) +public class RecursiveInodeIteratorTest extends InodeStoreTestBase { + + public RecursiveInodeIteratorTest(Function store) { + super(store); + } + + MutableInode mInodeA = inodeDir(1, 0, "a"); + MutableInode mInodeAB = inodeDir(2, 1, "b"); + MutableInode mInodeABC = inodeDir(3, 2, "c"); + MutableInode mInodeABCF1 = inodeFile(4, 3, "f1"); + MutableInode mInodeABCF2 = inodeFile(5, 3, "f2"); + MutableInode mInodeAC = inodeDir(6, 1, "c"); + MutableInode mInodeACF1 = inodeFile(7, 6, "f1"); + MutableInode mInodeACF2 = inodeFile(8, 6, "f2"); + MutableInode mInodeACF3 = inodeFile(9, 6, "f3"); + MutableInode mInodeAF1 = inodeFile(10, 1, "f1"); + MutableInode mInodeB = inodeDir(11, 0, "b"); + MutableInode mInodeC = inodeDir(12, 0, "c"); + MutableInode mInodeF1 = inodeFile(13, 0, "f1"); + MutableInode mInodeF2 = inodeFile(14, 0, "f2"); + MutableInode mInodeG = inodeDir(15, 0, "g"); + + /* + / + /a + /a/b + /a/b/c + /a/b/c/f1 + /a/b/c/f2 + /a/c + /a/c/f1 + /a/c/f2 + /a/c/f3 + /a/f1 + /b + /c + /f1 + /f2 + /g + */ + private void createInodeTree() { + writeInode(mRoot); + writeInode(mInodeA); + writeInode(mInodeAB); + writeInode(mInodeABC); + writeInode(mInodeABCF1); + writeInode(mInodeABCF2); + writeInode(mInodeAC); + writeInode(mInodeACF1); + writeInode(mInodeACF2); + writeInode(mInodeACF3); + writeInode(mInodeAF1); + writeInode(mInodeB); + writeInode(mInodeC); + writeInode(mInodeF1); + writeInode(mInodeF2); + writeInode(mInodeG); + + writeEdge(mRoot, mInodeA); + writeEdge(mInodeA, mInodeAB); + writeEdge(mInodeAB, mInodeABC); + writeEdge(mInodeABC, mInodeABCF1); + writeEdge(mInodeABC, mInodeABCF2); + writeEdge(mInodeA, mInodeAC); + writeEdge(mInodeAC, mInodeACF1); + writeEdge(mInodeAC, mInodeACF2); + writeEdge(mInodeAC, mInodeACF3); + writeEdge(mInodeA, mInodeAF1); + writeEdge(mRoot, mInodeB); + writeEdge(mRoot, mInodeC); + writeEdge(mRoot, mInodeF1); + writeEdge(mRoot, mInodeF2); + writeEdge(mRoot, mInodeG); + } + + @Test + public void recursiveListing() throws Exception { + createInodeTree(); + + List> inodes = Arrays.asList( + mRoot, mInodeA, mInodeAB, mInodeABC, mInodeABCF1, mInodeABCF2, mInodeAC, mInodeACF1, + mInodeACF2, mInodeACF3, mInodeAF1, mInodeB, mInodeC, mInodeF1, mInodeF2, mInodeG + ); + + List paths = Arrays.asList( + "/", + "/a", + "/a/b", + "/a/b/c", + "/a/b/c/f1", + "/a/b/c/f2", + "/a/c", + "/a/c/f1", + "/a/c/f2", + "/a/c/f3", + "/a/f1", + "/b", + "/c", + "/f1", + "/f2", + "/g" + ); + + InodeTree tree = new InodeTree(mStore, Mockito.mock(ContainerIdGenerable.class), + Mockito.mock(InodeDirectoryIdGenerator.class), new MountTable( + Mockito.mock(UfsManager.class), Mockito.mock(MountInfo.class), Clock.systemUTC()), + mLockManager); + + LockingScheme lockingScheme = new LockingScheme(new AlluxioURI("/"), + InodeTree.LockPattern.READ, false); + int idx = 0; + try (LockedInodePath lockedPath = + tree.lockInodePath(lockingScheme, NoopJournalContext.INSTANCE)) { + RecursiveInodeIterator iterator = (RecursiveInodeIterator) + mStore.getSkippableChildrenIterator(ReadOption.defaults(), + DescendantType.ALL, true, lockedPath); + while (iterator.hasNext()) { + InodeIterationResult result = iterator.next(); + assertEquals(paths.get(idx), result.getLockedPath().getUri().getPath()); + result.getLockedPath().traverse(); + assertEquals(inodes.get(idx).getId(), result.getInode().getId()); + assertEquals(inodes.get(idx).getId(), result.getLockedPath().getInode().getId()); + idx++; + } + iterator.close(); + } + } + + @Test + public void recursiveListingSkipChildren() throws Exception { + /* + / + /a + /a/b -> SKIP CHILDREN + /a/b/c (SKIPPED) + /a/b/c/f1 (SKIPPED) + /a/b/c/f2 (SKIPPED) + /a/c -> SKIP CHILDREN + /a/c/f1 (SKIPPED) + /a/c/f2 (SKIPPED) + /a/c/f3 (SKIPPED) + /a/f1 + /b -> SKIP CHILDREN + /c + /f1 + /f2 + /g -> SKIP CHILDREN + */ + + createInodeTree(); + + List> inodes = Arrays.asList( + mRoot, mInodeA, mInodeAB, mInodeAC, mInodeAF1, mInodeB, mInodeC, mInodeF1, mInodeF2, mInodeG + ); + + List paths = Arrays.asList( + "/", + "/a", + "/a/b", + "/a/c", + "/a/f1", + "/b", + "/c", + "/f1", + "/f2", + "/g" + ); + + InodeTree tree = new InodeTree(mStore, Mockito.mock(ContainerIdGenerable.class), + Mockito.mock(InodeDirectoryIdGenerator.class), new MountTable( + Mockito.mock(UfsManager.class), Mockito.mock(MountInfo.class), Clock.systemUTC()), + mLockManager); + + LockingScheme lockingScheme = new LockingScheme(new AlluxioURI("/"), + InodeTree.LockPattern.READ, false); + int idx = 0; + try (LockedInodePath lockedPath = + tree.lockInodePath(lockingScheme, NoopJournalContext.INSTANCE)) { + RecursiveInodeIterator iterator = (RecursiveInodeIterator) + mStore.getSkippableChildrenIterator(ReadOption.defaults(), + DescendantType.ALL, true, lockedPath); + while (iterator.hasNext()) { + InodeIterationResult result = iterator.next(); + assertEquals(paths.get(idx), result.getLockedPath().getUri().getPath()); + result.getLockedPath().traverse(); + assertEquals(inodes.get(idx).getId(), result.getInode().getId()); + assertEquals(inodes.get(idx).getId(), result.getLockedPath().getInode().getId()); + // The locked inode path will become stale after skipChildrenOfTheCurrent() is called. + if (result.getLockedPath().getUri().getPath().equals("/a/b") + || result.getLockedPath().getUri().getPath().equals("/b") + || result.getLockedPath().getUri().getPath().equals("/a/c") + || result.getLockedPath().getUri().getPath().equals("/g")) { + iterator.skipChildrenOfTheCurrent(); + } + idx++; + } + iterator.close(); + } + } + + @Test + public void recursiveListingStartFrom1() throws Exception { + /* + / + /a + /a/b + /a/b/c + /a/b/c/f1 (SKIPPED) + /a/b/c/f2 + /a/c + /a/c/f1 + /a/c/f2 + /a/c/f3 + /a/f1 + /b + /c + /f1 + /f2 + /g + */ + + createInodeTree(); + + List> inodes = Arrays.asList( + mRoot, mInodeA, mInodeAB, mInodeABC, mInodeABCF2, mInodeAC, mInodeACF1, mInodeACF2, + mInodeACF3, mInodeAF1, mInodeB, mInodeC, mInodeF1, mInodeF2, mInodeG + ); + + List paths = Arrays.asList( + "/", + "/a", + "/a/b", + "/a/b/c", + "/a/b/c/f2", + "/a/c", + "/a/c/f1", + "/a/c/f2", + "/a/c/f3", + "/a/f1", + "/b", + "/c", + "/f1", + "/f2", + "/g" + ); + + InodeTree tree = new InodeTree(mStore, Mockito.mock(ContainerIdGenerable.class), + Mockito.mock(InodeDirectoryIdGenerator.class), new MountTable( + Mockito.mock(UfsManager.class), Mockito.mock(MountInfo.class), Clock.systemUTC()), + mLockManager); + + LockingScheme lockingScheme = new LockingScheme(new AlluxioURI("/"), + InodeTree.LockPattern.READ, false); + int idx = 0; + try (LockedInodePath lockedPath = + tree.lockInodePath(lockingScheme, NoopJournalContext.INSTANCE)) { + RecursiveInodeIterator iterator = (RecursiveInodeIterator) + mStore.getSkippableChildrenIterator( + ReadOption.newBuilder().setReadFrom("a/b/c/f11").build(), + DescendantType.ALL, true, lockedPath); + while (iterator.hasNext()) { + InodeIterationResult result = iterator.next(); + assertEquals(paths.get(idx), result.getLockedPath().getUri().getPath()); + result.getLockedPath().traverse(); + assertEquals(inodes.get(idx).getId(), result.getInode().getId()); + assertEquals(inodes.get(idx).getId(), result.getLockedPath().getInode().getId()); + idx++; + } + iterator.close(); + } + } + + @Test + public void recursiveListingStartFrom2() throws Exception { + /* + / + /a + /a/b (SKIPPED) + /a/b/c (SKIPPED) + /a/b/c/f1 (SKIPPED) + /a/b/c/f2 (SKIPPED) + /a/c + /a/c/f1 (SKIPPED) + /a/c/f2 (SKIPPED) + /a/c/f3 + /a/f1 + /b + /c + /f1 + /f2 + /g + */ + + createInodeTree(); + + List> inodes = Arrays.asList( + mRoot, mInodeA, mInodeAC, mInodeACF3, mInodeAF1, mInodeB, mInodeC, mInodeF1, mInodeF2, + mInodeG + ); + + List paths = Arrays.asList( + "/", + "/a", + "/a/c", + "/a/c/f3", + "/a/f1", + "/b", + "/c", + "/f1", + "/f2", + "/g" + ); + + InodeTree tree = new InodeTree(mStore, Mockito.mock(ContainerIdGenerable.class), + Mockito.mock(InodeDirectoryIdGenerator.class), new MountTable( + Mockito.mock(UfsManager.class), Mockito.mock(MountInfo.class), Clock.systemUTC()), + mLockManager); + + LockingScheme lockingScheme = new LockingScheme(new AlluxioURI("/"), + InodeTree.LockPattern.READ, false); + int idx = 0; + try (LockedInodePath lockedPath = + tree.lockInodePath(lockingScheme, NoopJournalContext.INSTANCE)) { + RecursiveInodeIterator iterator = (RecursiveInodeIterator) + mStore.getSkippableChildrenIterator( + ReadOption.newBuilder().setReadFrom("a/c/f3").build(), + DescendantType.ALL, true, lockedPath); + while (iterator.hasNext()) { + InodeIterationResult result = iterator.next(); + assertEquals(paths.get(idx), result.getLockedPath().getUri().getPath()); + result.getLockedPath().traverse(); + assertEquals(inodes.get(idx).getId(), result.getInode().getId()); + assertEquals(inodes.get(idx).getId(), result.getLockedPath().getInode().getId()); + idx++; + } + iterator.close(); + } + } + + @Test + public void recursiveListingStartFromSkipAll() throws Exception { + createInodeTree(); + + List> inodes = Collections.singletonList(mRoot); + + List paths = Collections.singletonList("/"); + + InodeTree tree = new InodeTree(mStore, Mockito.mock(ContainerIdGenerable.class), + Mockito.mock(InodeDirectoryIdGenerator.class), new MountTable( + Mockito.mock(UfsManager.class), Mockito.mock(MountInfo.class), Clock.systemUTC()), + mLockManager); + + LockingScheme lockingScheme = new LockingScheme(new AlluxioURI("/"), + InodeTree.LockPattern.READ, false); + int idx = 0; + try (LockedInodePath lockedPath = + tree.lockInodePath(lockingScheme, NoopJournalContext.INSTANCE)) { + RecursiveInodeIterator iterator = (RecursiveInodeIterator) + mStore.getSkippableChildrenIterator( + ReadOption.newBuilder().setReadFrom("z").build(), + DescendantType.ALL, true, lockedPath); + while (iterator.hasNext()) { + InodeIterationResult result = iterator.next(); + assertEquals(paths.get(idx), result.getLockedPath().getUri().getPath()); + result.getLockedPath().traverse(); + assertEquals(inodes.get(idx).getId(), result.getInode().getId()); + assertEquals(inodes.get(idx).getId(), result.getLockedPath().getInode().getId()); + idx++; + } + iterator.close(); + } + } +} diff --git a/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksBlockMetaStoreTest.java b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksBlockMetaStoreTest.java new file mode 100644 index 000000000000..d6a3072febb8 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksBlockMetaStoreTest.java @@ -0,0 +1,275 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore.rocks; + +import static alluxio.master.metastore.rocks.RocksStoreTestUtils.waitForReaders; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.master.journal.checkpoint.CheckpointInputStream; +import alluxio.master.metastore.BlockMetaStore; +import alluxio.proto.meta.Block; +import alluxio.resource.CloseableIterator; +import alluxio.util.ThreadFactoryUtils; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.BufferedOutputStream; +import java.io.DataInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.Nullable; + +public class RocksBlockMetaStoreTest { + private static final int FILE_NUMBER = 400; + private static final int THREAD_NUMBER = 20; + + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + public String mPath; + public RocksBlockMetaStore mStore; + + private ExecutorService mThreadPool; + + @Before + public void setUp() throws Exception { + Configuration.set(PropertyKey.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT, "500ms"); + Configuration.set(PropertyKey.TEST_MODE, true); + // Wait for a shorter period of time in test + Configuration.set(PropertyKey.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT, "1s"); + mPath = mFolder.newFolder().getAbsolutePath(); + mStore = new RocksBlockMetaStore(mFolder.newFolder().getAbsolutePath()); + mThreadPool = Executors.newCachedThreadPool(ThreadFactoryUtils.build("test-executor-%d", true)); + } + + @After + public void tearDown() throws Exception { + mStore.close(); + mThreadPool.shutdownNow(); + mThreadPool = null; + } + + @Test + public void escapingIteratorExceptionInNext() throws Exception { + prepareBlocks(FILE_NUMBER); + + FlakyRocksBlockStore delegateStore = new FlakyRocksBlockStore(mPath, mStore); + AtomicReference exception = new AtomicReference<>(null); + try (CloseableIterator brokenIter = + delegateStore.getCloseableIterator(false, true)) { + while (brokenIter.hasNext()) { + brokenIter.next(); + } + } catch (Exception e) { + exception.set(e); + } + assertNotNull(exception.get()); + + // Even if the iter is flaky, the lock and ref count are managed correctly + // A close action will look at the ref count and err if there is a lock leak + assertEquals(0, mStore.getRocksStore().getSharedLockCount()); + mStore.close(); + } + + @Test + public void escapingIteratorExceptionInHasNext() throws Exception { + prepareBlocks(FILE_NUMBER); + + FlakyRocksBlockStore delegateStore = new FlakyRocksBlockStore(mPath, mStore); + AtomicReference exception = new AtomicReference<>(null); + try (CloseableIterator brokenIter = + delegateStore.getCloseableIterator(true, false)) { + while (brokenIter.hasNext()) { + brokenIter.next(); + } + } catch (Exception e) { + exception.set(e); + } + assertNotNull(exception.get()); + + // Even if the iter is flaky, the lock and ref count are managed correctly + // A close action will look at the ref count and err if there is a lock leak + assertEquals(0, mStore.getRocksStore().getSharedLockCount()); + mStore.close(); + } + + @Test + public void longRunningIterAndCheckpoint() throws Exception { + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + prepareBlocks(FILE_NUMBER); + + // Create a bunch of long running iterators on the InodeStore + CountDownLatch readerLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch restoreLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + submitIterJob(THREAD_NUMBER, errors, results, readerLatch, restoreLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerLatch.await(); + File checkpointFile = File.createTempFile("checkpoint-for-recovery", ""); + try (BufferedOutputStream out = + new BufferedOutputStream(new FileOutputStream(checkpointFile))) { + mStore.writeToCheckpoint(out); + } + assertTrue(Files.size(checkpointFile.toPath()) > 0); + + // Verify that the iterators can still run + restoreLatch.countDown(); + waitForReaders(futures); + + // All iterators should abort because the RocksDB contents have changed + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + } + + @Test + public void longRunningIterAndRestore() throws Exception { + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + prepareBlocks(FILE_NUMBER); + + // Prepare a checkpoint file + File checkpointFile = File.createTempFile("checkpoint-for-recovery", ""); + try (BufferedOutputStream out = + new BufferedOutputStream(new FileOutputStream(checkpointFile))) { + mStore.writeToCheckpoint(out); + } + + // Create a bunch of long running iterators on the InodeStore + CountDownLatch readerLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch restoreLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + submitIterJob(THREAD_NUMBER, errors, results, readerLatch, restoreLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerLatch.await(); + try (CheckpointInputStream in = new CheckpointInputStream( + (new DataInputStream(new FileInputStream(checkpointFile))))) { + mStore.restoreFromCheckpoint(in); + } + + // Verify that the iterators can still run + restoreLatch.countDown(); + waitForReaders(futures); + + // All iterators should abort because the RocksDB contents have changed + assertEquals(THREAD_NUMBER, errors.size()); + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(0, completed); + long aborted = results.stream().filter(n -> n == 10).count(); + assertEquals(THREAD_NUMBER, aborted); + } + + public static class FlakyRocksBlockStore extends RocksInodeStore { + private final RocksBlockMetaStore mDelegate; + + public FlakyRocksBlockStore(String baseDir, RocksBlockMetaStore delegate) { + super(baseDir); + mDelegate = delegate; + } + + public CloseableIterator getCloseableIterator( + boolean hasNextIsFlaky, boolean nextIsFlaky) { + CloseableIterator iter = mDelegate.getCloseableIterator(); + + // This iterator is flaky + return new CloseableIterator(iter) { + private int mCounter = 0; + + @Override + public void closeResource() { + iter.closeResource(); + } + + @Override + public boolean hasNext() { + if (mCounter == 5 && hasNextIsFlaky) { + throw new RuntimeException("Unexpected exception in iterator"); + } + return iter.hasNext(); + } + + @Override + public BlockMetaStore.Block next() { + mCounter++; + if (mCounter == 5 && nextIsFlaky) { + throw new RuntimeException("Unexpected exception in iterator"); + } + return iter.next(); + } + }; + } + } + + private void prepareBlocks(int blockCount) throws Exception { + for (int i = 1; i < blockCount + 1; i++) { + mStore.putBlock(i, Block.BlockMeta.newBuilder().setLength(100).build()); + } + } + + private List> submitIterJob(int threadCount, + ArrayBlockingQueue errors, ArrayBlockingQueue results, + @Nullable CountDownLatch readersRunningLatch, + @Nullable CountDownLatch writerCompletedLatch) { + List> futures = new ArrayList<>(); + for (int k = 0; k < threadCount; k++) { + futures.add(mThreadPool.submit(() -> { + int listedCount = 0; + try (CloseableIterator iter = mStore.getCloseableIterator()) { + while (iter.hasNext()) { + if (listedCount == 10 && readersRunningLatch != null) { + readersRunningLatch.countDown(); + if (writerCompletedLatch != null) { + // Pretend the reader is blocked and will wake up after the writer is done + writerCompletedLatch.await(); + } + } + iter.next(); + listedCount++; + } + } catch (Exception e) { + errors.add(e); + } finally { + results.add(listedCount); + } + return null; + })); + } + return futures; + } +} diff --git a/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksInodeStoreTest.java b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksInodeStoreTest.java index 0e4a4561bb07..d4f4b619e7ad 100644 --- a/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksInodeStoreTest.java +++ b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksInodeStoreTest.java @@ -11,45 +11,732 @@ package alluxio.master.metastore.rocks; +import static alluxio.master.metastore.rocks.RocksStoreTestUtils.waitForReaders; import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.meta.InodeView; +import alluxio.master.file.meta.MutableInode; import alluxio.master.file.meta.MutableInodeDirectory; +import alluxio.master.journal.checkpoint.CheckpointInputStream; import alluxio.master.metastore.InodeStore.WriteBatch; +import alluxio.master.metastore.ReadOption; +import alluxio.resource.CloseableIterator; +import alluxio.util.ThreadFactoryUtils; +import org.junit.After; +import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import java.io.BufferedOutputStream; +import java.io.DataInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiFunction; +import javax.annotation.Nullable; public class RocksInodeStoreTest { + private static final int FILE_NUMBER = 400; + private static final int THREAD_NUMBER = 20; + @Rule public TemporaryFolder mFolder = new TemporaryFolder(); + public String mPath; + public RocksInodeStore mStore; + + private ExecutorService mThreadPool; + + // Functional wrappers of RocksDB r/w actions + private QuadFunction, ArrayBlockingQueue, + CountDownLatch, CountDownLatch, List>> mCreateAddReaders = + (errors, results, readerRunningLatch, writerCompletedLatch) -> { + return submitAddInodeJob( + errors, results, readerRunningLatch, writerCompletedLatch); + }; + private QuadFunction, ArrayBlockingQueue, + CountDownLatch, CountDownLatch, List>> mCreateGetReaders = + (errors, results, readerRunningLatch, writerCompletedLatch) -> { + return submitGetInodeJob( + errors, results, readerRunningLatch, writerCompletedLatch); + }; + private QuadFunction, ArrayBlockingQueue, + CountDownLatch, CountDownLatch, List>> mCreateListReadersAbort = + (errors, results, readerRunningLatch, writerCompletedLatch) -> { + // Do not wait for the writer latch, writer will run concurrent to the list actions + return submitListingJob( + errors, results, readerRunningLatch, null); + }; + + @Before + public void setUp() throws Exception { + Configuration.set(PropertyKey.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT, "500ms"); + Configuration.set(PropertyKey.TEST_MODE, true); + // Wait for a shorter period of time in test + Configuration.set(PropertyKey.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT, "1s"); + mPath = mFolder.newFolder().getAbsolutePath(); + mStore = new RocksInodeStore(mFolder.newFolder().getAbsolutePath()); + mThreadPool = Executors.newCachedThreadPool( + ThreadFactoryUtils.build("test-executor-%d", true)); + } + + @After + public void tearDown() throws Exception { + mStore.close(); + mThreadPool.shutdownNow(); + mThreadPool = null; + } + @Test public void batchWrite() throws IOException { - RocksInodeStore store = new RocksInodeStore(mFolder.newFolder().getAbsolutePath()); - WriteBatch batch = store.createWriteBatch(); + WriteBatch batch = mStore.createWriteBatch(); for (int i = 1; i < 20; i++) { batch.writeInode( MutableInodeDirectory.create(i, 0, "dir" + i, CreateDirectoryContext.defaults())); } batch.commit(); for (int i = 1; i < 20; i++) { - assertEquals("dir" + i, store.get(i).get().getName()); + assertEquals("dir" + i, mStore.get(i).get().getName()); } } @Test public void toStringEntries() throws IOException { - RocksInodeStore store = new RocksInodeStore(mFolder.newFolder().getAbsolutePath()); - assertEquals("", store.toStringEntries()); + assertEquals("", mStore.toStringEntries()); + + mStore.writeInode(MutableInodeDirectory.create( + 1, 0, "dir", CreateDirectoryContext.defaults())); + assertEquals("dir", mStore.get(1).get().getName()); + assertThat(mStore.toStringEntries(), containsString("name=dir")); + } + + @Test + public void concurrentListAndClose() throws Exception { + testConcurrentReaderAndClose(mCreateListReadersAbort); + } + + @Test + public void concurrentListAndRestore() throws Exception { + testConcurrentReaderAndRestore(mCreateListReadersAbort, (errors, results) -> { + assertTrue(errors.size() <= THREAD_NUMBER); + // Depending on the thread execution order, sometimes the reader threads + // may run to finish before the writer thread picks up the signal and flag + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed + errors.size()); + return null; + }, (errors, results) -> { + // Results are all empty after the clear + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentListAndCheckpoint() throws Exception { + testConcurrentReaderAndCheckpoint(mCreateListReadersAbort, (errors, results) -> { + assertTrue(errors.size() <= THREAD_NUMBER); + // Depending on the thread execution order, sometimes the reader threads + // may run to finish before the writer thread picks up the signal and flag + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed + errors.size()); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentListAndClear() throws Exception { + testConcurrentReaderAndClear(mCreateListReadersAbort, (errors, results) -> { + assertTrue(errors.size() <= THREAD_NUMBER); + // Depending on the thread execution order, sometimes the reader threads + // may run to finish before the writer thread picks up the signal and flag + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed + errors.size()); + return null; + }, (errors, results) -> { + // Results are all empty after the clear + assertEquals(0, errors.size()); + long seeEmpty = results.stream().filter(n -> n == 0).count(); + assertEquals(THREAD_NUMBER, seeEmpty); + return null; + }); + } + + @Test + public void concurrentGetAndClose() throws Exception { + testConcurrentReaderAndClose(mCreateGetReaders); + } + + @Test + public void concurrentGetAndRestore() throws Exception { + testConcurrentReaderAndRestore(mCreateGetReaders, (errors, results) -> { + // The closer will finish and the new Get operations are unaffected + // If one inode does not exist, result will be Optional.empty + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentGetAndCheckpoint() throws Exception { + testConcurrentReaderAndCheckpoint(mCreateGetReaders, (errors, results) -> { + // The closer will finish and the new Get operations are unaffected + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentGetAndClear() throws Exception { + testConcurrentReaderAndClear(mCreateGetReaders, (errors, results) -> { + // The closer will finish and the new Get operations are unaffected + // However, Get after the RocksDB is cleared will get empty results + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentAddAndClose() throws Exception { + testConcurrentReaderAndClose(mCreateAddReaders); + } + + @Test + public void concurrentAddAndRestore() throws Exception { + testConcurrentReaderAndRestore(mCreateAddReaders, (errors, results) -> { + // After the restore finishes, new add operations can go on unaffected + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentAddAndCheckpoint() throws Exception { + testConcurrentReaderAndCheckpoint(mCreateAddReaders, (errors, results) -> { + // After the clear finishes, add operations can go on unaffected + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentAddAndClear() throws Exception { + testConcurrentReaderAndClear(mCreateAddReaders, (errors, results) -> { + // After the clear finishes, add operations can go on unaffected + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + private List> submitListingJob( + ArrayBlockingQueue errors, + ArrayBlockingQueue results, + @Nullable CountDownLatch readersRunningLatch, + @Nullable CountDownLatch writerCompletedLatch) { + List> futures = new ArrayList<>(); + for (int k = 0; k < THREAD_NUMBER; k++) { + futures.add(mThreadPool.submit(() -> { + int listedCount = 0; + try (CloseableIterator iter = mStore.getChildIds(0L)) { + while (iter.hasNext()) { + if (listedCount == 10 && readersRunningLatch != null) { + readersRunningLatch.countDown(); + if (writerCompletedLatch != null) { + // Pretend the reader is blocked and will wake up after the writer is done + writerCompletedLatch.await(); + } + } + iter.next(); + listedCount++; + } + } catch (Exception e) { + errors.add(e); + } finally { + results.add(listedCount); + } + return null; + })); + } + return futures; + } + + private List> submitIterJob(int threadCount, + ArrayBlockingQueue errors, + ArrayBlockingQueue results, + @Nullable CountDownLatch readersRunningLatch, + @Nullable CountDownLatch writerCompletedLatch) { + List> futures = new ArrayList<>(); + for (int k = 0; k < threadCount; k++) { + futures.add(mThreadPool.submit(() -> { + int listedCount = 0; + try (CloseableIterator iter = mStore.getCloseableIterator()) { + while (iter.hasNext()) { + if (listedCount == 10 && readersRunningLatch != null) { + readersRunningLatch.countDown(); + if (writerCompletedLatch != null) { + // Pretend the reader is blocked and will wake up after the writer is done + writerCompletedLatch.await(); + } + } + iter.next(); + listedCount++; + } + } catch (Exception e) { + errors.add(e); + } finally { + results.add(listedCount); + } + return null; + })); + } + return futures; + } + + @Test + public void escapingIteratorExceptionInNext() throws Exception { + prepareFiles(FILE_NUMBER); + + FlakyRocksInodeStore delegateStore = new FlakyRocksInodeStore(mPath, mStore); + AtomicReference exception = new AtomicReference<>(null); + try (CloseableIterator brokenIter = + delegateStore.getCloseableIterator(false, true)) { + while (brokenIter.hasNext()) { + brokenIter.next(); + } + } catch (Exception e) { + exception.set(e); + } + assertNotNull(exception.get()); + + // Even if the iter is flaky, the lock and ref count are managed correctly + // A close action will look at the ref count and err if there is a lock leak + assertEquals(0, mStore.getRocksStore().getSharedLockCount()); + mStore.close(); + } + + @Test + public void escapingIteratorExceptionInHasNext() throws Exception { + prepareFiles(FILE_NUMBER); + + FlakyRocksInodeStore delegateStore = new FlakyRocksInodeStore(mPath, mStore); + AtomicReference exception = new AtomicReference<>(null); + try (CloseableIterator brokenIter = + delegateStore.getCloseableIterator(true, false)) { + while (brokenIter.hasNext()) { + brokenIter.next(); + } + } catch (Exception e) { + exception.set(e); + } + assertNotNull(exception.get()); + + // Even if the iter is flaky, the lock and ref count are managed correctly + // A close action will look at the ref count and err if there is a lock leak + assertEquals(0, mStore.getRocksStore().getSharedLockCount()); + mStore.close(); + } + + @Test + public void longRunningIterAndRestore() throws Exception { + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + prepareFiles(FILE_NUMBER); + + // Prepare a checkpoint file + File checkpointFile = File.createTempFile("checkpoint-for-recovery", ""); + try (BufferedOutputStream out = + new BufferedOutputStream(new FileOutputStream(checkpointFile))) { + mStore.writeToCheckpoint(out); + } + + // Create a bunch of long running iterators on the InodeStore + CountDownLatch readerLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch restoreLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + submitIterJob(THREAD_NUMBER, errors, results, readerLatch, restoreLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerLatch.await(); + try (CheckpointInputStream in = new CheckpointInputStream( + (new DataInputStream(new FileInputStream(checkpointFile))))) { + mStore.restoreFromCheckpoint(in); + } + + // Verify that the iterators can still run + restoreLatch.countDown(); + waitForReaders(futures); + + // All iterators should abort because the RocksDB contents have changed + assertEquals(THREAD_NUMBER, errors.size()); + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(0, completed); + long aborted = results.stream().filter(n -> n == 10).count(); + assertEquals(THREAD_NUMBER, aborted); + } + + @Test + public void longRunningIterAndCheckpoint() throws Exception { + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + prepareFiles(FILE_NUMBER); + + // Create a bunch of long running iterators on the InodeStore + CountDownLatch readerLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch restoreLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + submitIterJob(THREAD_NUMBER, errors, results, readerLatch, restoreLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerLatch.await(); + File checkpointFile = File.createTempFile("checkpoint-for-recovery", ""); + try (BufferedOutputStream out = + new BufferedOutputStream(new FileOutputStream(checkpointFile))) { + mStore.writeToCheckpoint(out); + } + assertTrue(Files.size(checkpointFile.toPath()) > 0); + + // Verify that the iterators can still run + restoreLatch.countDown(); + waitForReaders(futures); + + // All iterators should abort because the RocksDB contents have changed + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + } + + public static class FlakyRocksInodeStore extends RocksInodeStore { + private final RocksInodeStore mDelegate; + + public FlakyRocksInodeStore(String baseDir, RocksInodeStore delegate) { + super(baseDir); + mDelegate = delegate; + } + + public CloseableIterator getCloseableIterator( + boolean hasNextIsFlaky, boolean nextIsFlaky) { + CloseableIterator iter = mDelegate.getCloseableIterator(); + + // This iterator is flaky + return new CloseableIterator(iter) { + private int mCounter = 0; + + @Override + public void closeResource() { + iter.closeResource(); + } + + @Override + public boolean hasNext() { + if (mCounter == 5 && hasNextIsFlaky) { + throw new RuntimeException("Unexpected exception in iterator"); + } + return iter.hasNext(); + } + + @Override + public InodeView next() { + mCounter++; + if (mCounter == 5 && nextIsFlaky) { + throw new RuntimeException("Unexpected exception in iterator"); + } + return iter.next(); + } + }; + } + } + + private List> submitGetInodeJob( + ArrayBlockingQueue errors, + ArrayBlockingQueue results, + @Nullable CountDownLatch readersRunningLatch, + @Nullable CountDownLatch writerCompletedLatch) { + List> futures = new ArrayList<>(); + for (int k = 0; k < THREAD_NUMBER; k++) { + final int iterNum = k; + futures.add(mThreadPool.submit(() -> { + int finishedCount = 0; + try { + for (int x = 0; x < THREAD_NUMBER; x++) { + long targetInodeId = iterNum * THREAD_NUMBER + x; + Optional> dir = mStore.getMutable(targetInodeId, ReadOption.defaults()); + finishedCount++; + if (x == 10 && readersRunningLatch != null) { + readersRunningLatch.countDown(); + if (writerCompletedLatch != null) { + // Pretend the reader is blocked and will wake up after the writer is done + writerCompletedLatch.await(); + } + } + } + } catch (Exception e) { + e.printStackTrace(); + errors.add(e); + } finally { + results.add(finishedCount); + } + return null; + })); + } + return futures; + } + + private List> submitAddInodeJob( + ArrayBlockingQueue errors, + ArrayBlockingQueue results, + @Nullable CountDownLatch readersRunningLatch, + @Nullable CountDownLatch writerCompletedLatch) { + List> futures = new ArrayList<>(); + for (int k = 0; k < THREAD_NUMBER; k++) { + final int iterNum = k; + futures.add(mThreadPool.submit(() -> { + int finishedCount = 0; + try { + for (int x = 0; x < THREAD_NUMBER; x++) { + long targetInodeId = iterNum * THREAD_NUMBER + x; + MutableInodeDirectory dir = + MutableInodeDirectory.create(targetInodeId, 0, "dir" + targetInodeId, + CreateDirectoryContext.defaults()); + mStore.addChild(0L, dir); + if (x == 10 && readersRunningLatch != null) { + readersRunningLatch.countDown(); + if (writerCompletedLatch != null) { + // Pretend the reader is blocked and will wake up after the writer is done + writerCompletedLatch.await(); + } + } + finishedCount++; + } + } catch (Exception e) { + errors.add(e); + } finally { + results.add(finishedCount); + } + return null; + })); + } + return futures; + } + + private void testConcurrentReaderAndClose( + QuadFunction, ArrayBlockingQueue, CountDownLatch, + CountDownLatch, List>> reader) throws Exception { + prepareFiles(FILE_NUMBER); + + CountDownLatch readerRunningLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch writerCompletedLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + reader.apply(errors, results, readerRunningLatch, writerCompletedLatch); + + // Await for the threads to be running in the middle, then trigger the closer event + readerRunningLatch.await(); + mStore.close(); + writerCompletedLatch.countDown(); + + waitForReaders(futures); + // Reaching here means close() was successfully, which implies ref count reached zero + assertTrue(errors.size() <= THREAD_NUMBER); + } + + private void testConcurrentReaderAndCheckpoint( + QuadFunction, ArrayBlockingQueue, CountDownLatch, + CountDownLatch, List>> reader, + BiFunction, ArrayBlockingQueue, + Void> stateCheckAfterReadersFinish, + BiFunction, ArrayBlockingQueue, + Void> stateCheckAfterReadersFinishAgain + ) throws Exception { + prepareFiles(FILE_NUMBER); + + CountDownLatch readerRunningLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch writerCompletedLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + reader.apply(errors, results, readerRunningLatch, writerCompletedLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerRunningLatch.await(); + File checkpointFile = File.createTempFile("checkpoint-file", ""); + try (BufferedOutputStream out = + new BufferedOutputStream(new FileOutputStream(checkpointFile))) { + mStore.writeToCheckpoint(out); + } + assertTrue(Files.size(checkpointFile.toPath()) > 0); + writerCompletedLatch.countDown(); + + waitForReaders(futures); + stateCheckAfterReadersFinish.apply(errors, results); + + // Verify that the RocksDB can still serve + ArrayBlockingQueue errorsAgain = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue resultsAgain = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futuresAgain = reader.apply(errorsAgain, resultsAgain, null, null); + waitForReaders(futuresAgain); + stateCheckAfterReadersFinishAgain.apply(errorsAgain, resultsAgain); + } + + private void testConcurrentReaderAndRestore( + QuadFunction, ArrayBlockingQueue, + CountDownLatch, CountDownLatch, List>> reader, + BiFunction, ArrayBlockingQueue, + Void> stateCheckAfterReadersFinish, + BiFunction, ArrayBlockingQueue, + Void> stateCheckAfterReadersFinishAgain + ) throws Exception { + prepareFiles(FILE_NUMBER); + // Prepare a checkpoint file + File checkpointFile = File.createTempFile("checkpoint-for-recovery", ""); + try (BufferedOutputStream out = + new BufferedOutputStream(new FileOutputStream(checkpointFile))) { + mStore.writeToCheckpoint(out); + } + + CountDownLatch readerRunningLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch writerCompletedLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + reader.apply(errors, results, readerRunningLatch, writerCompletedLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerRunningLatch.await(); + try (CheckpointInputStream in = new CheckpointInputStream( + (new DataInputStream(new FileInputStream(checkpointFile))))) { + mStore.restoreFromCheckpoint(in); + } + writerCompletedLatch.countDown(); + waitForReaders(futures); + stateCheckAfterReadersFinish.apply(errors, results); + + // Verify that the RocksDB can still serve + ArrayBlockingQueue errorsAgain = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue resultsAgain = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futuresAgain = reader.apply(errorsAgain, resultsAgain, null, null); + waitForReaders(futuresAgain); + stateCheckAfterReadersFinishAgain.apply(errorsAgain, resultsAgain); + } + + private void testConcurrentReaderAndClear( + QuadFunction, ArrayBlockingQueue, + CountDownLatch, CountDownLatch, List>> reader, + BiFunction, ArrayBlockingQueue, + Void> stateCheckAfterReadersFinish, + BiFunction, ArrayBlockingQueue, + Void> stateCheckAfterReadersFinishAgain + ) throws Exception { + prepareFiles(FILE_NUMBER); + + CountDownLatch readerRunningLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch writerCompletedLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + reader.apply(errors, results, readerRunningLatch, writerCompletedLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerRunningLatch.await(); + mStore.clear(); + writerCompletedLatch.countDown(); + + waitForReaders(futures); + stateCheckAfterReadersFinish.apply(errors, results); + + // Verify that the RocksDB can still serve + ArrayBlockingQueue errorsAgain = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue resultsAgain = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futuresAgain = reader.apply(errorsAgain, resultsAgain, null, null); + waitForReaders(futuresAgain); + stateCheckAfterReadersFinishAgain.apply(errorsAgain, resultsAgain); + } + + private void prepareFiles(int fileCount) throws Exception { + for (int i = 1; i < fileCount + 1; i++) { + MutableInodeDirectory dir = MutableInodeDirectory.create(i, 0, "dir" + i, + CreateDirectoryContext.defaults()); + mStore.addChild(0, dir); + mStore.writeInode(dir); + } + } - store.writeInode(MutableInodeDirectory.create(1, 0, "dir", CreateDirectoryContext.defaults())); - assertEquals("dir", store.get(1).get().getName()); - assertThat(store.toStringEntries(), containsString("name=dir")); + @FunctionalInterface + interface QuadFunction { + R apply(A a, B b, C c, D d); } } diff --git a/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTest.java b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTest.java index e89cf874e7a2..3ce80ed00581 100644 --- a/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTest.java +++ b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTest.java @@ -12,10 +12,21 @@ package alluxio.master.metastore.rocks; import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.ExceptionMessage; +import alluxio.exception.runtime.UnavailableRuntimeException; import alluxio.master.journal.checkpoint.CheckpointInputStream; +import alluxio.util.ThreadFactoryUtils; import com.google.common.primitives.Longs; +import org.junit.After; +import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -26,60 +37,484 @@ import org.rocksdb.DBOptions; import org.rocksdb.HashLinkedListMemTableConfig; import org.rocksdb.RocksDB; +import org.rocksdb.RocksObject; import org.rocksdb.WriteOptions; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicReference; public class RocksStoreTest { @Rule public TemporaryFolder mFolder = new TemporaryFolder(); - @Test - public void backupRestore() throws Exception { + private RocksStore mStore; + List mToClose; + AtomicReference mTestColumn; + String mDbDir; + String mBackupsDir; + List mColumnDescriptors; + ExecutorService mThreadPool; + + @Before + public void setup() throws Exception { + Configuration.set(PropertyKey.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT, "500ms"); + Configuration.set(PropertyKey.TEST_MODE, true); + + mToClose = new ArrayList<>(); ColumnFamilyOptions cfOpts = new ColumnFamilyOptions() .setMemTableConfig(new HashLinkedListMemTableConfig()) .setCompressionType(CompressionType.NO_COMPRESSION) .useFixedLengthPrefixExtractor(Longs.BYTES); // We always search using the initial long key + mToClose.add(cfOpts); - List columnDescriptors = + mColumnDescriptors = Arrays.asList(new ColumnFamilyDescriptor("test".getBytes(), cfOpts)); - String dbDir = mFolder.newFolder("rocks").getAbsolutePath(); - String backupsDir = mFolder.newFolder("rocks-backups").getAbsolutePath(); - AtomicReference testColumn = new AtomicReference<>(); + mDbDir = mFolder.newFolder("rocks").getAbsolutePath(); + mBackupsDir = mFolder.newFolder("rocks-backups").getAbsolutePath(); + mTestColumn = new AtomicReference<>(); DBOptions dbOpts = new DBOptions().setCreateIfMissing(true) .setCreateMissingColumnFamilies(true) .setAllowConcurrentMemtableWrite(false); - RocksStore store = - new RocksStore("test", dbDir, backupsDir, dbOpts, columnDescriptors, - Arrays.asList(testColumn)); + mToClose.add(dbOpts); + + mStore = new RocksStore("test", mDbDir, mBackupsDir, dbOpts, mColumnDescriptors, + Arrays.asList(mTestColumn)); + + mThreadPool = Executors.newCachedThreadPool( + ThreadFactoryUtils.build("test-executor-%d", true)); + } + + @After + public void tearDown() throws Exception { + try (RocksExclusiveLockHandle lock = mStore.lockForClosing()) { + mStore.close(); + } + + Collections.reverse(mToClose); + mToClose.forEach(RocksObject::close); + + mThreadPool.shutdownNow(); + } + + @Test + public void backupRestore() throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(); - RocksDB db = store.getDb(); + RocksDB db; int count = 10; - for (int i = 0; i < count; i++) { - db.put(testColumn.get(), new WriteOptions().setDisableWAL(true), ("a" + i).getBytes(), - "b".getBytes()); + try (RocksSharedLockHandle lock = mStore.checkAndAcquireSharedLock()) { + db = mStore.getDb(); + for (int i = 0; i < count; i++) { + db.put(mTestColumn.get(), new WriteOptions().setDisableWAL(true), ("a" + i).getBytes(), + "b".getBytes()); + } + } + try (RocksExclusiveLockHandle lock = mStore.lockForCheckpoint()) { + mStore.writeToCheckpoint(baos); + } + try (RocksExclusiveLockHandle lock = mStore.lockForClosing()) { + mStore.close(); } - store.writeToCheckpoint(baos); - store.close(); - String newBbDir = mFolder.newFolder("rocks-new").getAbsolutePath(); - dbOpts = new DBOptions().setCreateIfMissing(true) + String newDbDir = mFolder.newFolder("rocks-new").getAbsolutePath(); + DBOptions dbOpts = new DBOptions().setCreateIfMissing(true) .setCreateMissingColumnFamilies(true) .setAllowConcurrentMemtableWrite(false); - store = - new RocksStore("test-new", newBbDir, backupsDir, dbOpts, columnDescriptors, - Arrays.asList(testColumn)); - store.restoreFromCheckpoint( - new CheckpointInputStream(new ByteArrayInputStream(baos.toByteArray()))); - db = store.getDb(); - for (int i = 0; i < count; i++) { - assertArrayEquals("b".getBytes(), db.get(testColumn.get(), ("a" + i).getBytes())); + mToClose.add(dbOpts); + mStore = + new RocksStore("test-new", newDbDir, mBackupsDir, dbOpts, mColumnDescriptors, + Arrays.asList(mTestColumn)); + try (RocksExclusiveLockHandle lock = mStore.lockForRewrite()) { + mStore.restoreFromCheckpoint( + new CheckpointInputStream(new ByteArrayInputStream(baos.toByteArray()))); + } + try (RocksSharedLockHandle lock = mStore.checkAndAcquireSharedLock()) { + db = mStore.getDb(); + for (int i = 0; i < count; i++) { + assertArrayEquals("b".getBytes(), db.get(mTestColumn.get(), ("a" + i).getBytes())); + } + } + } + + @Test + public void sharedLockRefCount() { + List readLocks = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + assertEquals(i, mStore.getSharedLockCount()); + RocksSharedLockHandle lockHandle = mStore.checkAndAcquireSharedLock(); + readLocks.add(lockHandle); + } + assertEquals(20, mStore.getSharedLockCount()); + + for (int i = 0; i < 20; i++) { + assertEquals(20 - i, mStore.getSharedLockCount()); + readLocks.get(i).close(); } - store.close(); - cfOpts.close(); + assertEquals(0, mStore.getSharedLockCount()); + } + + @Test + public void exclusiveLockOnClosing() { + RocksExclusiveLockHandle exclusiveLock = mStore.lockForClosing(); + + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.checkAndAcquireSharedLock(); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + Exception f = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.shouldAbort(0); + }); + assertTrue(f.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + exclusiveLock.close(); + assertEquals(0, mStore.getSharedLockCount()); + // The flag is NOT reset after the lock is released, because the service will exit + assertTrue(mStore.isServiceStopping()); + } + + @Test + public void exclusiveLockOnCheckpoint() { + RocksExclusiveLockHandle exclusiveLock = mStore.lockForCheckpoint(); + + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.checkAndAcquireSharedLock(); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + Exception f = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.shouldAbort(0); + }); + assertTrue(f.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + exclusiveLock.close(); + assertEquals(0, mStore.getSharedLockCount()); + // The flag is reset after the lock is released, because the service will restore + assertFalse(mStore.isServiceStopping()); + } + + @Test + public void exclusiveLockOnRewrite() { + RocksExclusiveLockHandle exclusiveLock = mStore.lockForRewrite(); + + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.checkAndAcquireSharedLock(); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + Exception f = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.shouldAbort(0); + }); + assertTrue(f.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + exclusiveLock.close(); + assertEquals(0, mStore.getSharedLockCount()); + // The flag is reset after the lock is released, because the service will restore + assertFalse(mStore.isServiceStopping()); + } + + @Test + public void exclusiveLockForcedAndReleasedAfterSharedLock() throws Exception { + // One reader gets the shared lock and does not release for a long time + CountDownLatch readerCloseLatch = new CountDownLatch(1); + CountDownLatch writerStartLatch = new CountDownLatch(1); + Future f = mThreadPool.submit(() -> { + RocksSharedLockHandle lockHandle = mStore.checkAndAcquireSharedLock(); + System.out.println("Read lock grabbed"); + writerStartLatch.countDown(); + assertEquals(1, mStore.getSharedLockCount()); + try { + readerCloseLatch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + System.out.println("Able to unlock read lock now"); + // After a long time, this lock is released after the exclusive lock has been forced + lockHandle.close(); + System.out.println("Read lock released"); + // The lock release should not mess up the ref count + assertEquals(0, mStore.getSharedLockCount()); + return null; + }); + + // One closer comes in and eventually will grab the lock after wait + writerStartLatch.await(); + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + RocksExclusiveLockHandle exclusiveLock = mStore.lockForCheckpoint(); + // After some wait, the closer will force the lock and reset the ref count + // And the ref count will be reset on that force + assertEquals(0, mStore.getSharedLockCount()); + // Let the reader finish before the exclusive lock is released + readerCloseLatch.countDown(); + f.get(); + // That should not mess up the ref count + assertEquals(0, mStore.getSharedLockCount()); + exclusiveLock.close(); + assertEquals(0, mStore.getSharedLockCount()); + } + + @Test + public void exclusiveLockForcedAndReleasedBeforeSharedLock() throws Exception { + // One reader gets the shared lock and does not release for a long time + CountDownLatch readerCloseLatch = new CountDownLatch(1); + CountDownLatch writerStartLatch = new CountDownLatch(1); + Future f = mThreadPool.submit(() -> { + RocksSharedLockHandle lockHandle = mStore.checkAndAcquireSharedLock(); + System.out.println("Read lock grabbed"); + writerStartLatch.countDown(); + assertEquals(1, mStore.getSharedLockCount()); + try { + readerCloseLatch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + System.out.println("Able to unlock read lock now"); + // After a long time, this lock is released after the exclusive lock has been forced + lockHandle.close(); + System.out.println("Read lock released"); + // The lock release should not mess up the ref count + assertEquals(0, mStore.getSharedLockCount()); + return null; + }); + + // One closer comes in and eventually will grab the lock after wait + writerStartLatch.await(); + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + RocksExclusiveLockHandle exclusiveLock = mStore.lockForCheckpoint(); + // After some wait, the closer will force the lock and reset the ref count + // And the ref count will be reset on that force + assertEquals(0, mStore.getSharedLockCount()); + // The exclusive lock releases before the reader even wakes up + exclusiveLock.close(); + // Let the reader finish + readerCloseLatch.countDown(); + f.get(); + // The ref count is not messed up + assertEquals(0, mStore.getSharedLockCount()); + } + + @Test + public void forcingExclusiveLockInTestWillErr() throws Exception { + // One reader gets the shared lock and does not release for a long time + CountDownLatch readerCloseLatch = new CountDownLatch(1); + CountDownLatch writerStartLatch = new CountDownLatch(1); + Future f = mThreadPool.submit(() -> { + RocksSharedLockHandle lockHandle = mStore.checkAndAcquireSharedLock(); + System.out.println("Read lock grabbed"); + writerStartLatch.countDown(); + assertEquals(1, mStore.getSharedLockCount()); + try { + readerCloseLatch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + System.out.println("Able to unlock read lock now"); + // After a long time, this lock is released after the exclusive lock has been forced + lockHandle.close(); + System.out.println("Read lock released"); + // The lock release should not mess up the ref count + assertEquals(0, mStore.getSharedLockCount()); + return null; + }); + + // One closer comes in and eventually will grab the lock after wait + writerStartLatch.await(); + // In test mode, forcing the exclusive lock will result in an exception + // This will help us detect issues with the ref count + assertThrows(RuntimeException.class, () -> { + RocksExclusiveLockHandle exclusiveLock = mStore.lockForCheckpoint(); + }); + // Let the reader finish + readerCloseLatch.countDown(); + f.get(); + // Even if the exclusive lock attempt failed, the ref count will be correct + assertEquals(0, mStore.getSharedLockCount()); + } + + @Test + public void readerCanContinueAfterCheckpoint() throws Exception { + // One reader gets the shared lock and does not release for a long time + CountDownLatch readerCloseLatch = new CountDownLatch(1); + CountDownLatch writerStartLatch = new CountDownLatch(1); + Future f = mThreadPool.submit(() -> { + RocksSharedLockHandle lockHandle = mStore.checkAndAcquireSharedLock(); + System.out.println("Read lock grabbed"); + writerStartLatch.countDown(); + try { + readerCloseLatch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + // While this reader is sleeping, one checkpoint is completed in the background + // This check should pass without throwing an exception + // And that means the reader can continue doing what it was doing + mStore.shouldAbort(lockHandle.getLockVersion()); + + System.out.println("Able to continue reading"); + // After finishing its work, this lock is released + lockHandle.close(); + System.out.println("Read lock released"); + // The lock release has passed due but should not mess up the ref count + assertEquals(0, mStore.getSharedLockCount()); + return null; + }); + + // One closer comes in and eventually will grab the lock after wait + writerStartLatch.await(); + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + RocksExclusiveLockHandle exclusiveLock = mStore.lockForCheckpoint(); + // After some wait, the closer will force the lock and reset the ref count + // And the ref count will be reset on that force + assertEquals(0, mStore.getSharedLockCount()); + // Now the checkpointing was done, while the reader is still asleep + exclusiveLock.close(); + // Let the reader wake up and continue + readerCloseLatch.countDown(); + f.get(); + assertEquals(0, mStore.getSharedLockCount()); + } + + @Test + public void readerCanNotContinueAfterRestore() throws Exception { + // One reader gets the shared lock and does not release for a long time + CountDownLatch readerCloseLatch = new CountDownLatch(1); + CountDownLatch writerStartLatch = new CountDownLatch(1); + Future f = mThreadPool.submit(() -> { + RocksSharedLockHandle lockHandle = mStore.checkAndAcquireSharedLock(); + System.out.println("Read lock grabbed"); + writerStartLatch.countDown(); + try { + readerCloseLatch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + // While this reader is sleeping, one restore action is completed in the background + // This check should throw an exception because the RocksDB contents have changed + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.shouldAbort(lockHandle.getLockVersion()); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_REWRITTEN.getMessage())); + + System.out.println("Not able to continue reading"); + // After finishing its work, this lock is released + lockHandle.close(); + System.out.println("Read lock released"); + // The lock release has passed due but should not mess up the ref count + assertEquals(0, mStore.getSharedLockCount()); + return null; + }); + + // One closer comes in and eventually will grab the lock after wait + writerStartLatch.await(); + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + RocksExclusiveLockHandle exclusiveLock = mStore.lockForRewrite(); + // After some wait, the closer will force the lock and reset the ref count + // And the ref count will be reset on that force + assertEquals(0, mStore.getSharedLockCount()); + // Now the checkpointing was done, while the reader is still asleep + exclusiveLock.close(); + // Let the reader wake up and continue + readerCloseLatch.countDown(); + f.get(); + assertEquals(0, mStore.getSharedLockCount()); + } + + @Test + public void checkpointThenClose() { + RocksExclusiveLockHandle checkpointLock = mStore.lockForCheckpoint(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + // Before the checkpoint finishes, an attempt comes in to close + // This should succeed + RocksExclusiveLockHandle closeLock = mStore.lockForClosing(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + checkpointLock.close(); + closeLock.close(); + } + + @Test + public void rewriteThenClose() { + RocksExclusiveLockHandle rewriteLock = mStore.lockForRewrite(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + // Before the checkpoint finishes, an attempt comes in to close + // This should succeed + RocksExclusiveLockHandle closeLock = mStore.lockForClosing(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + rewriteLock.close(); + closeLock.close(); + } + + @Test + public void closeThenCheckpoint() { + RocksExclusiveLockHandle closeLock = mStore.lockForClosing(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + // Closing takes higher priority and a checkpoint attempt will fail + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + RocksExclusiveLockHandle checkpointLock = mStore.lockForCheckpoint(); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + closeLock.close(); + } + + @Test + public void closeThenRewrite() { + RocksExclusiveLockHandle closeLock = mStore.lockForClosing(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + // Closing takes higher priority and a checkpoint attempt will fail + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + RocksExclusiveLockHandle rewriteLock = mStore.lockForRewrite(); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + closeLock.close(); + } + + @Test + public void checkpointThenRewrite() { + RocksExclusiveLockHandle checkpointLock = mStore.lockForCheckpoint(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + // Rewrite/Checkpoint will yield to exclusive lock + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + RocksExclusiveLockHandle rewriteLock = mStore.lockForRewrite(); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + checkpointLock.close(); } } diff --git a/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTestUtils.java b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTestUtils.java new file mode 100644 index 000000000000..b3f004372d74 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTestUtils.java @@ -0,0 +1,29 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore.rocks; + +import static org.junit.Assert.fail; + +import java.util.List; +import java.util.concurrent.Future; + +public class RocksStoreTestUtils { + public static void waitForReaders(List> futures) { + futures.stream().forEach(f -> { + try { + f.get(); + } catch (Exception e) { + fail("Met uncaught exception from iteration"); + } + }); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTest.java b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTest.java index babc788cea42..f043d72b1bae 100644 --- a/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTest.java +++ b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTest.java @@ -12,46 +12,25 @@ package alluxio.master.service.rpc; import alluxio.conf.Configuration; -import alluxio.grpc.GrpcServerAddress; -import alluxio.grpc.GrpcServerBuilder; +import alluxio.conf.PropertyKey; import alluxio.master.AlluxioMasterProcess; -import alluxio.master.MasterRegistry; -import alluxio.master.PortReservationRule; -import alluxio.util.CommonUtils; -import alluxio.util.WaitForOptions; import org.junit.Assert; import org.junit.Before; -import org.junit.Rule; import org.junit.Test; -import org.mockito.Mockito; - -import java.io.IOException; -import java.net.ConnectException; -import java.net.InetSocketAddress; -import java.net.Socket; -import java.util.Optional; +import org.junit.runner.RunWith; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.modules.junit4.PowerMockRunner; /** * Test for RpcSimpleService. */ -public class RpcServerServiceTest { - @Rule - public PortReservationRule mPort = new PortReservationRule(); - - private final MasterRegistry mRegistry = new MasterRegistry(); - private InetSocketAddress mRpcAddress; - private AlluxioMasterProcess mMasterProcess; - +@RunWith(PowerMockRunner.class) +@PrepareForTest(AlluxioMasterProcess.class) +public class RpcServerServiceTest extends RpcServerServiceTestBase { @Before - public void setUp() { - mRpcAddress = new InetSocketAddress(mPort.getPort()); - mMasterProcess = Mockito.mock(AlluxioMasterProcess.class); - Mockito.when(mMasterProcess.createBaseRpcServer()).thenAnswer(mock -> - GrpcServerBuilder.forAddress(GrpcServerAddress.create(mRpcAddress.getHostName(), - mRpcAddress), Configuration.global())); - Mockito.when(mMasterProcess.createRpcExecutorService()).thenReturn(Optional.empty()); - Mockito.when(mMasterProcess.getSafeModeManager()).thenReturn(Optional.empty()); + public void before() { + Configuration.set(PropertyKey.STANDBY_MASTER_GRPC_ENABLED, false); } @Test @@ -64,19 +43,19 @@ public void primaryOnlyTest() { service.start(); // after start and before stop the rpc port is always bound as either the rpc server or the // rejecting server is bound to is (depending on whether it is in PRIMARY or STANDBY state) - Assert.assertTrue(isBound()); + Assert.assertTrue(isGrpcBound()); Assert.assertFalse(service.isServing()); for (int i = 0; i < 5; i++) { service.promote(); Assert.assertTrue(service.isServing()); - Assert.assertTrue(isBound()); + Assert.assertTrue(isGrpcBound()); service.demote(); - Assert.assertTrue(isBound()); + Assert.assertTrue(isGrpcBound()); Assert.assertFalse(service.isServing()); } service.stop(); Assert.assertFalse(service.isServing()); - Assert.assertFalse(isBound()); + Assert.assertFalse(isGrpcBound()); } @Test @@ -99,24 +78,4 @@ public void doubleStartRpcServer() { Assert.assertThrows("rpc server must not be running", IllegalStateException.class, service::promote); } - - private boolean isBound() { - try (Socket socket = new Socket(mRpcAddress.getAddress(), mRpcAddress.getPort())) { - return true; - } catch (ConnectException e) { - return false; - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private boolean waitForFree() { - try { - CommonUtils.waitFor("wait for socket to be free", () -> !isBound(), - WaitForOptions.defaults().setTimeoutMs(1_000).setInterval(10)); - return true; - } catch (Exception e) { - return false; - } - } } diff --git a/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTestBase.java b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTestBase.java new file mode 100644 index 000000000000..6165eed72c3a --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTestBase.java @@ -0,0 +1,79 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.service.rpc; + +import alluxio.conf.Configuration; +import alluxio.grpc.GrpcServerAddress; +import alluxio.grpc.GrpcServerBuilder; +import alluxio.master.AlluxioMasterProcess; +import alluxio.master.MasterRegistry; +import alluxio.master.PortReservationRule; +import alluxio.util.CommonUtils; +import alluxio.util.WaitForOptions; + +import org.junit.Before; +import org.junit.Rule; +import org.mockito.Mockito; +import org.powermock.api.mockito.PowerMockito; + +import java.io.IOException; +import java.net.ConnectException; +import java.net.InetSocketAddress; +import java.net.Socket; +import java.util.Optional; + +/** + * Test base RpcService related tests. + */ +public class RpcServerServiceTestBase { + @Rule + public PortReservationRule mPort = new PortReservationRule(); + + protected final MasterRegistry mRegistry = new MasterRegistry(); + protected InetSocketAddress mRpcAddress; + protected AlluxioMasterProcess mMasterProcess; + + @Before + public void setUp() { + mRpcAddress = new InetSocketAddress(mPort.getPort()); + mMasterProcess = PowerMockito.mock(AlluxioMasterProcess.class); + Mockito.when(mMasterProcess.createBaseRpcServer()).thenAnswer(mock -> + GrpcServerBuilder.forAddress(GrpcServerAddress.create(mRpcAddress.getHostName(), + mRpcAddress), Configuration.global())); + Mockito.when(mMasterProcess.createRpcExecutorService()).thenReturn(Optional.empty()); + Mockito.when(mMasterProcess.getSafeModeManager()).thenReturn(Optional.empty()); + } + + protected boolean isGrpcBound() { + return isBound(mRpcAddress); + } + + protected boolean isBound(InetSocketAddress address) { + try (Socket socket = new Socket(address.getAddress(), address.getPort())) { + return true; + } catch (ConnectException e) { + return false; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + protected boolean waitForFree() { + try { + CommonUtils.waitFor("wait for socket to be free", () -> !isGrpcBound(), + WaitForOptions.defaults().setTimeoutMs(1_000).setInterval(10)); + return true; + } catch (Exception e) { + return false; + } + } +} diff --git a/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerStandbyGrpcServiceTest.java b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerStandbyGrpcServiceTest.java new file mode 100644 index 000000000000..998a651ba359 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerStandbyGrpcServiceTest.java @@ -0,0 +1,76 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.service.rpc; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.master.AlluxioMasterProcess; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.modules.junit4.PowerMockRunner; + +/** + * Test for RpcServerStandbyGrpcServiceTest. + */ +@RunWith(PowerMockRunner.class) +@PrepareForTest(AlluxioMasterProcess.class) +public class RpcServerStandbyGrpcServiceTest extends RpcServerServiceTestBase { + @Before + public void setUp() { + Configuration.reloadProperties(); + Configuration.set(PropertyKey.STANDBY_MASTER_GRPC_ENABLED, true); + super.setUp(); + } + + @Test + public void primaryOnlyTest() { + RpcServerService service = + RpcServerService.Factory.create(mRpcAddress, mMasterProcess, mRegistry); + Assert.assertTrue(waitForFree()); + + Assert.assertFalse(service.isServing()); + service.start(); + // when standby master is enabled, gRPC server is always on even if it's standby. + Assert.assertTrue(isGrpcBound()); + Assert.assertTrue(service.isServing()); + for (int i = 0; i < 5; i++) { + service.promote(); + Assert.assertTrue(service.isServing()); + Assert.assertTrue(isGrpcBound()); + service.demote(); + Assert.assertTrue(isGrpcBound()); + Assert.assertTrue(service.isServing()); + } + service.stop(); + Assert.assertFalse(service.isServing()); + Assert.assertFalse(isGrpcBound()); + } + + @Test + public void doubleStartRpcServer() { + RpcServerService service = + RpcServerService.Factory.create(mRpcAddress, mMasterProcess, mRegistry); + + service.start(); + service.promote(); + Assert.assertThrows("double promotion is not allowed", + IllegalStateException.class, service::promote); + + service.demote(); + Assert.assertThrows("double demotion is not allowed", + IllegalStateException.class, service::demote); + } +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/AlluxioProxyProcess.java b/core/server/proxy/src/main/java/alluxio/proxy/AlluxioProxyProcess.java index cb1f90f01963..217feb9befb8 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/AlluxioProxyProcess.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/AlluxioProxyProcess.java @@ -11,15 +11,23 @@ package alluxio.proxy; +import alluxio.ClientContext; import alluxio.Constants; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.grpc.NetAddress; +import alluxio.heartbeat.FixedIntervalSupplier; +import alluxio.heartbeat.HeartbeatContext; +import alluxio.heartbeat.HeartbeatThread; +import alluxio.master.MasterClientContext; import alluxio.util.CommonUtils; +import alluxio.util.ThreadFactoryUtils; import alluxio.util.WaitForOptions; import alluxio.util.network.NetworkAddressUtils; import alluxio.util.network.NetworkAddressUtils.ServiceType; import alluxio.web.ProxyWebServer; import alluxio.web.WebServer; +import alluxio.wire.Address; import org.apache.commons.io.IOUtils; import org.apache.http.HttpResponse; @@ -33,6 +41,8 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.TimeoutException; import javax.annotation.concurrent.NotThreadSafe; @@ -50,6 +60,10 @@ public final class AlluxioProxyProcess implements ProxyProcess { private final long mStartTimeMs; private final CountDownLatch mLatch; + private ProxyMasterSync mMasterSync; + + private ExecutorService mPool = Executors.newFixedThreadPool(1, + ThreadFactoryUtils.build("proxy-routine-%d", true)); /** * Creates an instance of {@link AlluxioProxy}. @@ -82,7 +96,19 @@ public void start() throws Exception { // reset proxy web port Configuration.set(PropertyKey.PROXY_WEB_PORT, mWebServer.getLocalPort()); + NetAddress proxyAddress = NetAddress.newBuilder() + .setHost(NetworkAddressUtils.getConnectHost(ServiceType.PROXY_WEB, + Configuration.global())) + .setRpcPort(mWebServer.getLocalPort()).build(); mWebServer.start(); + MasterClientContext context = MasterClientContext.newBuilder(ClientContext.create()).build(); + mMasterSync = new ProxyMasterSync( + Address.fromProto(proxyAddress), context, mStartTimeMs); + mPool.submit(new HeartbeatThread(HeartbeatContext.PROXY_META_MASTER_SYNC, mMasterSync, + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.PROXY_MASTER_HEARTBEAT_INTERVAL)), + Configuration.global(), context.getUserState())); + mLatch.await(); } @@ -92,6 +118,13 @@ public void stop() throws Exception { mWebServer.stop(); mWebServer = null; } + if (mMasterSync != null) { + mMasterSync.close(); + } + if (mPool != null) { + mPool.shutdownNow(); + mPool = null; + } mLatch.countDown(); } diff --git a/core/server/proxy/src/main/java/alluxio/proxy/ProxyMasterSync.java b/core/server/proxy/src/main/java/alluxio/proxy/ProxyMasterSync.java new file mode 100644 index 000000000000..6b63f630ed1a --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/ProxyMasterSync.java @@ -0,0 +1,69 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy; + +import alluxio.heartbeat.HeartbeatExecutor; +import alluxio.master.MasterClientContext; +import alluxio.wire.Address; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.time.Instant; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * The Proxy will maintain a stateless heartbeat with the primary master. + * This enables the admin to list all living Proxy instances in the cluster. + */ +@NotThreadSafe +public final class ProxyMasterSync implements HeartbeatExecutor { + private static final Logger LOG = LoggerFactory.getLogger(ProxyMasterSync.class); + + /** The address of this proxy. */ + private final Address mAddress; + + /** Client for communication with the primary master. */ + private final RetryHandlingMetaMasterProxyClient mMasterClient; + + /** + * Creates a new instance of {@link ProxyMasterSync}. + * + * @param address the proxy address + * @param context the communication context + * @param startTimeMs start time of this instance + */ + public ProxyMasterSync(Address address, MasterClientContext context, long startTimeMs) { + mAddress = address; + mMasterClient = new RetryHandlingMetaMasterProxyClient(mAddress, context, startTimeMs); + LOG.info("Proxy start time is {}", Instant.ofEpochMilli(startTimeMs)); + } + + /** + * Heartbeats to the primary master node. + */ + @Override + public void heartbeat(long timeLimitMs) { + try { + LOG.debug("Heart beating to primary master"); + mMasterClient.proxyHeartbeat(); + } catch (IOException e) { + // Log the error but do not shut down the proxy + LOG.error("Failed to heartbeat to primary master", e); + mMasterClient.disconnect(); + } + } + + @Override + public void close() {} +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/RetryHandlingMetaMasterProxyClient.java b/core/server/proxy/src/main/java/alluxio/proxy/RetryHandlingMetaMasterProxyClient.java new file mode 100644 index 000000000000..03e9dcd846ba --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/RetryHandlingMetaMasterProxyClient.java @@ -0,0 +1,95 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy; + +import alluxio.AbstractMasterClient; +import alluxio.Constants; +import alluxio.RuntimeConstants; +import alluxio.conf.PropertyKey; +import alluxio.grpc.BuildVersion; +import alluxio.grpc.MetaMasterProxyServiceGrpc; +import alluxio.grpc.ProxyHeartbeatPOptions; +import alluxio.grpc.ProxyHeartbeatPRequest; +import alluxio.grpc.ServiceType; +import alluxio.master.MasterClientContext; +import alluxio.wire.Address; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import javax.annotation.concurrent.ThreadSafe; + +/** + * A wrapper for the gRPC client to interact with the meta master. + */ +@ThreadSafe +public class RetryHandlingMetaMasterProxyClient extends AbstractMasterClient { + private static final Logger RPC_LOG = + LoggerFactory.getLogger(RetryHandlingMetaMasterProxyClient.class); + private MetaMasterProxyServiceGrpc.MetaMasterProxyServiceBlockingStub mClient = null; + private final Address mProxyAddress; + private final long mStartTimeMs; + + /** + * Creates a new meta master client. + * + * @param proxyAddress address of the proxy + * @param conf master client configuration + * @param startTimeMs start timestamp + */ + public RetryHandlingMetaMasterProxyClient( + Address proxyAddress, MasterClientContext conf, long startTimeMs) { + super(conf); + mProxyAddress = proxyAddress; + mStartTimeMs = startTimeMs; + } + + @Override + protected ServiceType getRemoteServiceType() { + return ServiceType.META_MASTER_PROXY_SERVICE; + } + + @Override + protected String getServiceName() { + return Constants.META_MASTER_PROXY_SERVICE_NAME; + } + + @Override + protected long getServiceVersion() { + return Constants.META_MASTER_PROXY_SERVICE_VERSION; + } + + @Override + protected void afterConnect() { + mClient = MetaMasterProxyServiceGrpc.newBlockingStub(mChannel); + } + + /** + * Sends a heartbeat to the primary master. + */ + public void proxyHeartbeat() throws IOException { + BuildVersion version = BuildVersion.newBuilder().setVersion(RuntimeConstants.VERSION) + .setRevision(RuntimeConstants.REVISION_SHORT).build(); + ProxyHeartbeatPOptions options = ProxyHeartbeatPOptions.newBuilder() + .setProxyAddress(mProxyAddress.toProto()) + .setStartTime(mStartTimeMs) + .setVersion(version).build(); + retryRPC(() -> mClient.withDeadlineAfter( + mContext.getClusterConf().getMs( + PropertyKey.USER_RPC_RETRY_MAX_DURATION), TimeUnit.MILLISECONDS) + .proxyHeartbeat(ProxyHeartbeatPRequest.newBuilder().setOptions(options).build()), + RPC_LOG, "ProxyHeartbeat", "options=%s", options); + } +} + diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/CompleteMultipartUploadHandler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/CompleteMultipartUploadHandler.java index 2a849413c477..acbc75a6fdeb 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/CompleteMultipartUploadHandler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/CompleteMultipartUploadHandler.java @@ -20,6 +20,7 @@ import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.AlluxioException; +import alluxio.exception.status.InvalidArgumentException; import alluxio.grpc.Bits; import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.DeletePOptions; @@ -33,6 +34,7 @@ import alluxio.web.ProxyWebServer; import com.codahale.metrics.Timer; +import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.dataformat.xml.XmlMapper; import com.google.common.base.Stopwatch; @@ -192,7 +194,8 @@ public void handle(String s, Request request, HttpServletRequest httpServletRequ throw e; } finally { if (stopwatch != null) { - ProxyWebServer.logAccess(httpServletRequest, httpServletResponse, stopwatch); + ProxyWebServer.logAccess(httpServletRequest, httpServletResponse, + stopwatch, S3BaseTask.OpType.CompleteMultipartUpload); } } } @@ -375,6 +378,11 @@ public CompleteMultipartUploadRequest parseCompleteMultipartUploadRequest(String if (cause instanceof S3Exception) { throw S3RestUtils.toObjectS3Exception((S3Exception) cause, objectPath); } + if (e instanceof JsonParseException) { + throw new S3Exception( + new InvalidArgumentException("Failed parsing CompleteMultipartUploadRequest."), + objectPath, S3ErrorCode.INVALID_ARGUMENT); + } throw S3RestUtils.toObjectS3Exception(e, objectPath); } return request; diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/RangeFileInStream.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/RangeFileInStream.java index 1df6b1f64eef..ffb528895648 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/RangeFileInStream.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/RangeFileInStream.java @@ -15,6 +15,7 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; /** * This class is use {@link FileInStream} underlying, and implement range read. @@ -65,6 +66,19 @@ public int read(byte[] b, int off, int len) throws IOException { return n; } + /** + * Reads up to len bytes of data from the input stream into the byte buffer. + * @param byteBuffer the buffer into which the data is read + * @param off the start offset in the buffer at which the data is written + * @param len the maximum number of bytes to read + * @return the total number of bytes read into the buffer, or -1 if there is no more + * data because the end of the stream has been reached + * @throws IOException + */ + public int read(ByteBuffer byteBuffer, int off, int len) throws IOException { + return this.mUnderlyingStream.read(byteBuffer, off, len); + } + @Override public void close() throws IOException { mUnderlyingStream.close(); diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/RateLimitInputStream.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/RateLimitInputStream.java new file mode 100644 index 000000000000..7fa6f770e447 --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/RateLimitInputStream.java @@ -0,0 +1,68 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import com.google.common.util.concurrent.RateLimiter; + +import java.io.IOException; +import java.io.InputStream; + +/** + * This class is a wrapper for InputStream which limit rate when reading bytes. + */ +public class RateLimitInputStream extends InputStream { + + private final InputStream mInputStream; + private final RateLimiter[] mRateLimiters; + + /** + * Constructs a new {@link RateLimitInputStream}. + * + * @param inputStream Original stream to be limited + * @param rateLimiters RateLimiters to limit Maximal reading bytes per second + */ + public RateLimitInputStream(InputStream inputStream, RateLimiter... rateLimiters) { + mInputStream = inputStream; + mRateLimiters = rateLimiters; + } + + @Override + public int read() throws IOException { + acquire(1); + return mInputStream.read(); + } + + @Override + public int read(byte[] b) throws IOException { + return read(b, 0, b.length); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + acquire(Math.min(b.length - off, len)); + return mInputStream.read(b, off, len); + } + + @Override + public void close() throws IOException { + mInputStream.close(); + } + + private void acquire(int permits) { + for (RateLimiter rateLimiter : mRateLimiters) { + if (rateLimiter == null) { + continue; + } + rateLimiter.acquire(permits); + } + } +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BaseTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BaseTask.java new file mode 100644 index 000000000000..9a8a9004ffe3 --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BaseTask.java @@ -0,0 +1,108 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import javax.ws.rs.core.Response; + +/** + * S3 Abstract Base task for handling S3 API logic. + */ +public abstract class S3BaseTask { + + protected S3Handler mHandler; + protected OpType mOPType; + + /** + * Instantiate a S3BaseTask. + * + * @param handler S3Handler object + * @param opType the enum indicate the S3 API name + */ + public S3BaseTask(S3Handler handler, OpType opType) { + mHandler = handler; + mOPType = opType; + } + + /** + * Return the OpType (S3 API enum). + * + * @return OpType (S3 API enum) + */ + public OpType getOPType() { + return mOPType; + } + + /** + * Run core S3 API logic from different S3 task. + * + * @return Response object containing common HTTP response properties + */ + public abstract Response continueTask(); + + /** + * Run S3 API logic in a customized async way, e.g. delegate the + * core API logic to another thread and do something while waiting. + */ + public void handleTaskAsync() { + } + + /** + * Enum for tagging the http request to target for + * different threadpools for handling. + */ + public enum OpTag { + LIGHT, HEAVY + } + + /** + * Enum indicating name of S3 API handling per http request. + */ + public enum OpType { + + // Object Task + ListParts(OpTag.LIGHT), + GetObjectTagging(OpTag.LIGHT), + PutObjectTagging(OpTag.LIGHT), + DeleteObjectTagging(OpTag.LIGHT), + GetObject(OpTag.HEAVY), PutObject(OpTag.HEAVY), + CopyObject(OpTag.HEAVY), DeleteObject(OpTag.LIGHT), + HeadObject(OpTag.LIGHT), UploadPart(OpTag.LIGHT), + UploadPartCopy(OpTag.HEAVY), + CreateMultipartUpload(OpTag.LIGHT), + AbortMultipartUpload(OpTag.LIGHT), + CompleteMultipartUpload(OpTag.HEAVY), + + // Bucket Task + ListBuckets(OpTag.LIGHT), + ListMultipartUploads(OpTag.LIGHT), + GetBucketTagging(OpTag.LIGHT), + PutBucketTagging(OpTag.LIGHT), + DeleteBucketTagging(OpTag.LIGHT), + CreateBucket(OpTag.LIGHT), + ListObjects(OpTag.LIGHT), // as well as ListObjectsV2 + DeleteObjects(OpTag.LIGHT), + HeadBucket(OpTag.LIGHT), + DeleteBucket(OpTag.LIGHT), + Unsupported(OpTag.LIGHT), + Unknown(OpTag.LIGHT); + + private final OpTag mOpTag; + + OpType(OpTag opTag) { + mOpTag = opTag; + } + + OpTag getOpTag() { + return mOpTag; + } + } +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BucketTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BucketTask.java new file mode 100644 index 000000000000..17d3b5c7eade --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BucketTask.java @@ -0,0 +1,596 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import alluxio.AlluxioURI; +import alluxio.Constants; +import alluxio.client.file.FileSystem; +import alluxio.client.file.URIStatus; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.AccessControlException; +import alluxio.exception.AlluxioException; +import alluxio.exception.DirectoryNotEmptyException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.grpc.Bits; +import alluxio.grpc.CreateDirectoryPOptions; +import alluxio.grpc.DeletePOptions; +import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.PMode; +import alluxio.grpc.SetAttributePOptions; +import alluxio.proto.journal.File; + +import com.fasterxml.jackson.dataformat.xml.XmlMapper; +import com.google.common.base.Preconditions; +import com.google.common.net.InetAddresses; +import com.google.protobuf.ByteString; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.stream.Collectors; +import javax.ws.rs.core.Response; + +/** + * S3 Tasks to handle bucket level or global level request. + * (only bucket name or no bucket name is provided) + */ +public class S3BucketTask extends S3BaseTask { + private static final Logger LOG = LoggerFactory.getLogger(S3BucketTask.class); + + protected S3BucketTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + throw new S3Exception(S3ErrorCode.NOT_IMPLEMENTED); + }); + } + + /** + * Factory for getting a S3BucketTask type task. + */ + public static final class Factory { + /** + * Marshall the request and create corresponding bucket level S3 task. + * @param handler + * @return S3BucketTask + */ + public static S3BucketTask create(S3Handler handler) { + switch (handler.getHTTPVerb()) { + case "GET": + if (StringUtils.isEmpty(handler.getBucket())) { + return new ListBucketsTask(handler, OpType.ListBuckets); + } else if (handler.getQueryParameter("tagging") != null) { + return new GetBucketTaggingTask(handler, OpType.GetBucketTagging); + } else if (handler.getQueryParameter("uploads") != null) { + return new ListMultipartUploadsTask(handler, OpType.ListMultipartUploads); + } else { + return new ListObjectsTask(handler, OpType.ListObjects); + } + case "PUT": + if (handler.getQueryParameter("tagging") != null) { + return new PutBucketTaggingTask(handler, OpType.PutBucketTagging); + } else { + return new CreateBucketTask(handler, OpType.CreateBucket); + } + case "POST": + if (handler.getQueryParameter("delete") != null) { + return new DeleteObjectsTask(handler, OpType.DeleteObjects); + } + break; + case "HEAD": + if (!StringUtils.isEmpty(handler.getBucket())) { + return new HeadBucketTask(handler, OpType.HeadBucket); + } + break; + case "DELETE": + if (handler.getQueryParameter("tagging") != null) { + return new DeleteBucketTaggingTask(handler, OpType.DeleteBucketTagging); + } else { + return new DeleteBucketTask(handler, OpType.DeleteBucket); + } + default: + break; + } + return new S3BucketTask(handler, OpType.Unsupported); + } + } + + private static class ListBucketsTask extends S3BucketTask { + protected ListBucketsTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(S3Constants.EMPTY, () -> { + final String user = mHandler.getUser(); + + List objects = new ArrayList<>(); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, null, null)) { + try { + objects = mHandler.getMetaFS().listStatus(new AlluxioURI("/")); + } catch (AlluxioException | IOException e) { + if (e instanceof AccessControlException) { + auditContext.setAllowed(false); + } + auditContext.setSucceeded(false); + throw S3RestUtils.toBucketS3Exception(e, "/"); + } + + final List buckets = objects.stream() + .filter((uri) -> uri.getOwner().equals(user)) + // debatable (?) potentially breaks backcompat(?) + .filter(URIStatus::isFolder) + .collect(Collectors.toList()); + buckets.forEach( + (uri) -> mHandler.BUCKET_PATH_CACHE.put(uri.getPath(), true)); + return new ListAllMyBucketsResult(buckets); + } + }); + } + } // end of ListBucketsTask + + private static class GetBucketTaggingTask extends S3BucketTask { + protected GetBucketTaggingTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + + String path = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser( + mHandler.getUser(), mHandler.getMetaFS()); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext, + mHandler.BUCKET_PATH_CACHE); + AlluxioURI uri = new AlluxioURI(path); + try { + TaggingData tagData = S3RestUtils.deserializeTags(userFs.getStatus(uri).getXAttr()); + LOG.debug("GetBucketTagging tagData={}", tagData); + return tagData != null ? tagData : new TaggingData(); + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, mHandler.getBucket(), auditContext); + } + } + }); + } + } // end of GetBucketTaggingTask + + private static class ListMultipartUploadsTask extends S3BucketTask { + + protected ListMultipartUploadsTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + final String bucket = mHandler.getBucket(); + Preconditions.checkNotNull(bucket, "required 'bucket' parameter is missing"); + + String path = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext, + mHandler.BUCKET_PATH_CACHE); + try { + List children = mHandler.getMetaFS().listStatus(new AlluxioURI( + S3RestUtils.MULTIPART_UPLOADS_METADATA_DIR)); + final List uploadIds = children.stream() + .filter((uri) -> uri.getOwner().equals(user)) + .collect(Collectors.toList()); + return ListMultipartUploadsResult.buildFromStatuses(bucket, uploadIds); + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, bucket, auditContext); + } + } + }); + } + } // end of ListMultipartUploadsTask + + private static class ListObjectsTask extends S3BucketTask { + protected ListObjectsTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + private String normalizeS3Prefix(String prefix, char delimiter) { + if (prefix != null) { + int pos = prefix.lastIndexOf(delimiter); + if (pos >= 0) { + return prefix.substring(0, pos + 1); + } + } + return S3Constants.EMPTY; + } + + private String parsePathWithDelimiter(String bucketPath, String prefix, String delimiter) + throws S3Exception { + // TODO(czhu): allow non-"/" delimiters + // Alluxio only support use / as delimiter + if (!delimiter.equals(AlluxioURI.SEPARATOR)) { + throw new S3Exception(bucketPath, new S3ErrorCode( + S3ErrorCode.PRECONDITION_FAILED.getCode(), + "Alluxio S3 API only support / as delimiter.", + S3ErrorCode.PRECONDITION_FAILED.getStatus())); + } + char delim = AlluxioURI.SEPARATOR.charAt(0); + String normalizedBucket = + bucketPath.replace(S3Constants.BUCKET_SEPARATOR, AlluxioURI.SEPARATOR); + String normalizedPrefix = normalizeS3Prefix(prefix, delim); + + if (!normalizedPrefix.isEmpty() && !normalizedPrefix.startsWith(AlluxioURI.SEPARATOR)) { + normalizedPrefix = AlluxioURI.SEPARATOR + normalizedPrefix; + } + return normalizedBucket + normalizedPrefix; + } + + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + String path = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext, + mHandler.BUCKET_PATH_CACHE); + String markerParam = mHandler.getQueryParameter("marker"); + String maxKeysParam = mHandler.getQueryParameter("max-keys"); + String prefixParam = mHandler.getQueryParameter("prefix"); + String delimiterParam = mHandler.getQueryParameter("delimiter"); + String encodingTypeParam = mHandler.getQueryParameter("encoding-type"); + String listTypeParam = mHandler.getQueryParameter("list-type"); + String continuationTokenParam = mHandler.getQueryParameter("continuation-token"); + String startAfterParam = mHandler.getQueryParameter("start-after"); + + int maxKeys = maxKeysParam == null + ? ListBucketOptions.DEFAULT_MAX_KEYS : Integer.parseInt(maxKeysParam); + Integer listType = listTypeParam == null ? null : Integer.parseInt(listTypeParam); + ListBucketOptions listBucketOptions = ListBucketOptions.defaults() + .setMarker(markerParam) + .setPrefix(prefixParam) + .setMaxKeys(maxKeys) + .setDelimiter(delimiterParam) + .setEncodingType(encodingTypeParam) + .setListType(listType) + .setContinuationToken(continuationTokenParam) + .setStartAfter(startAfterParam); + + List children; + try { + // TODO(czhu): allow non-"/" delimiters by parsing the prefix & delimiter pair to + // determine what directory to list the contents of + // only list the direct children if delimiter is not null + if (StringUtils.isNotEmpty(delimiterParam)) { + if (prefixParam == null) { + path = parsePathWithDelimiter(path, S3Constants.EMPTY, delimiterParam); + } else { + path = parsePathWithDelimiter(path, prefixParam, delimiterParam); + } + children = userFs.listStatus(new AlluxioURI(path)); + } else { + if (prefixParam != null) { + path = parsePathWithDelimiter(path, prefixParam, AlluxioURI.SEPARATOR); + } + ListStatusPOptions options = ListStatusPOptions.newBuilder() + .setRecursive(true).build(); + children = userFs.listStatus(new AlluxioURI(path), options); + } + } catch (FileDoesNotExistException e) { + // Since we've called S3RestUtils.checkPathIsAlluxioDirectory() on the bucket path + // already, this indicates that the prefix was unable to be found in the Alluxio FS + children = new ArrayList<>(); + } catch (IOException | AlluxioException e) { + auditContext.setSucceeded(false); + throw S3RestUtils.toBucketS3Exception(e, mHandler.getBucket()); + } + return new ListBucketResult( + mHandler.getBucket(), + children, + listBucketOptions); + } // end try-with-resources block + }); + } + } // end of ListObjectsTask + + private static class PutBucketTaggingTask extends S3BucketTask { + + protected PutBucketTaggingTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), mHandler.getUser(), mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(mHandler.getMetaFS(), bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); + try { + TaggingData tagData = new XmlMapper().readerFor(TaggingData.class) + .readValue(mHandler.getInputStream()); + LOG.debug("PutBucketTagging tagData={}", tagData); + Map xattrMap = new HashMap<>(); + xattrMap.put(S3Constants.TAGGING_XATTR_KEY, TaggingData.serialize(tagData)); + SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() + .putAllXattr(xattrMap) + .setXattrUpdateStrategy(File.XAttrUpdateStrategy.UNION_REPLACE) + .build(); + userFs.setAttribute(new AlluxioURI(bucketPath), attrPOptions); + } catch (IOException e) { + if (e.getCause() instanceof S3Exception) { + throw S3RestUtils.toBucketS3Exception((S3Exception) e.getCause(), bucketPath, + auditContext); + } + auditContext.setSucceeded(false); + throw new S3Exception(e, bucketPath, S3ErrorCode.MALFORMED_XML); + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); + } + return Response.Status.OK; + } + }); + } + } // end of PutBucketTaggingTask + + private static class CreateBucketTask extends S3BucketTask { + protected CreateBucketTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + if (S3Handler.BUCKET_NAMING_RESTRICTION_ENABLED) { + Matcher m = S3Handler.BUCKET_ADJACENT_DOTS_DASHES_PATTERN.matcher(mHandler.getBucket()); + while (m.find()) { + if (!m.group().equals("--")) { + auditContext.setSucceeded(false); + throw new S3Exception(mHandler.getBucket(), S3ErrorCode.INVALID_BUCKET_NAME); + } + } + if (!S3Handler.BUCKET_VALID_NAME_PATTERN.matcher(mHandler.getBucket()).matches() + || S3Handler.BUCKET_INVALIDATION_PREFIX_PATTERN.matcher(mHandler.getBucket()) + .matches() + || S3Handler.BUCKET_INVALID_SUFFIX_PATTERN.matcher(mHandler.getBucket()).matches() + || InetAddresses.isInetAddress(mHandler.getBucket())) { + auditContext.setSucceeded(false); + throw new S3Exception(mHandler.getBucket(), S3ErrorCode.INVALID_BUCKET_NAME); + } + } + try { + URIStatus status = mHandler.getMetaFS().getStatus(new AlluxioURI(bucketPath)); + if (status.isFolder()) { + if (status.getOwner().equals(user)) { + // Silently swallow CreateBucket calls on existing buckets for this user + // - S3 clients may prepend PutObject requests with CreateBucket calls instead of + // calling HeadBucket to ensure that the bucket exists + mHandler.BUCKET_PATH_CACHE.put(bucketPath, true); + return Response.Status.OK; + } + // Otherwise, this bucket is owned by a different user + throw new S3Exception(S3ErrorCode.BUCKET_ALREADY_EXISTS); + } + // Otherwise, that path exists in Alluxio but is not a directory + auditContext.setSucceeded(false); + throw new InvalidPathException("A file already exists at bucket path " + bucketPath); + } catch (FileDoesNotExistException e) { + // do nothing, we will create the directory below + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); + } + + // These permission bits will be inherited by all objects/folders created within + // the bucket; we don't support custom bucket/object ACLs at the moment + CreateDirectoryPOptions options = + CreateDirectoryPOptions.newBuilder() + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE)) + .setWriteType(S3RestUtils.getS3WriteType()) + .build(); + try { + mHandler.getMetaFS().createDirectory(new AlluxioURI(bucketPath), options); + SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() + .setOwner(user) + .build(); + mHandler.getMetaFS().setAttribute(new AlluxioURI(bucketPath), attrPOptions); + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); + } + mHandler.BUCKET_PATH_CACHE.put(bucketPath, true); + return Response.Status.OK; + } + }); + } + } // end of CreateBucketTask + + private static class DeleteObjectsTask extends S3BucketTask { + + protected DeleteObjectsTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + try { + DeleteObjectsRequest request = new XmlMapper().readerFor(DeleteObjectsRequest.class) + .readValue(mHandler.getInputStream()); + List objs = request.getToDelete(); + List success = new ArrayList<>(); + List errored = new ArrayList<>(); + objs.sort(Comparator.comparingInt(x -> -1 * x.getKey().length())); + objs.forEach(obj -> { + try { + AlluxioURI uri = new AlluxioURI(bucketPath + + AlluxioURI.SEPARATOR + obj.getKey()); + DeletePOptions options = DeletePOptions.newBuilder().build(); + userFs.delete(uri, options); + DeleteObjectsResult.DeletedObject del = new DeleteObjectsResult.DeletedObject(); + del.setKey(obj.getKey()); + success.add(del); + } catch (FileDoesNotExistException | DirectoryNotEmptyException e) { + /* + FDNE - delete on FDNE should be counted as a success, as there's nothing to do + DNE - s3 has no concept dirs - if it _is_ a dir, nothing to delete. + */ + DeleteObjectsResult.DeletedObject del = new DeleteObjectsResult.DeletedObject(); + del.setKey(obj.getKey()); + success.add(del); + } catch (IOException | AlluxioException e) { + DeleteObjectsResult.ErrorObject err = new DeleteObjectsResult.ErrorObject(); + err.setKey(obj.getKey()); + err.setMessage(e.getMessage()); + errored.add(err); + } + }); + + DeleteObjectsResult result = new DeleteObjectsResult(); + if (!request.getQuiet()) { + result.setDeleted(success); + } + result.setErrored(errored); + return result; + } catch (IOException e) { + LOG.debug("Failed to parse DeleteObjects request:", e); + auditContext.setSucceeded(false); + return Response.Status.BAD_REQUEST; + } + } + }); + } + } // end of DeleteObjectsTask + + private static class HeadBucketTask extends S3BucketTask { + protected HeadBucketTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); + } + return Response.ok().build(); + }); + } + } // end of HeadBucketTask + + private static class DeleteBucketTaggingTask extends S3BucketTask { + + protected DeleteBucketTaggingTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); + + LOG.debug("DeleteBucketTagging bucket={}", bucketPath); + Map xattrMap = new HashMap<>(); + xattrMap.put(S3Constants.TAGGING_XATTR_KEY, ByteString.copyFrom(new byte[0])); + SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() + .putAllXattr(xattrMap) + .setXattrUpdateStrategy(File.XAttrUpdateStrategy.DELETE_KEYS) + .build(); + try { + userFs.setAttribute(new AlluxioURI(bucketPath), attrPOptions); + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); + } + return Response.Status.NO_CONTENT; + } + }); + } + } // end of DeleteBucketTaggingTask + + private static class DeleteBucketTask extends S3BucketTask { + + protected DeleteBucketTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); + // Delete the bucket. + DeletePOptions options = DeletePOptions.newBuilder().setAlluxioOnly(Configuration + .get(PropertyKey.PROXY_S3_DELETE_TYPE) + .equals(Constants.S3_DELETE_IN_ALLUXIO_ONLY)) + .build(); + try { + userFs.delete(new AlluxioURI(bucketPath), options); + mHandler.BUCKET_PATH_CACHE.put(bucketPath, false); + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); + } + return Response.Status.NO_CONTENT; + } + }); + } + } // end of DeleteBucketTask +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Constants.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Constants.java index 1dda55744f77..b94cffb6f289 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Constants.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Constants.java @@ -28,6 +28,7 @@ public final class S3Constants { * mount:point:bucket represents Alluxio directory /mount/point/bucket. */ public static final String BUCKET_SEPARATOR = ":"; + public static final String EMPTY = ""; /* Headers */ // standard headers @@ -40,6 +41,7 @@ public final class S3Constants { // AWS headers public static final String S3_ACL_HEADER = "x-amz-acl"; public static final String S3_COPY_SOURCE_HEADER = "x-amz-copy-source"; + public static final String S3_COPY_SOURCE_RANGE = "x-amz-copy-source-range"; public static final String S3_ETAG_HEADER = "ETAG"; public static final String S3_METADATA_DIRECTIVE_HEADER = "x-amz-metadata-directive"; diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java new file mode 100644 index 000000000000..7479d91f9e67 --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java @@ -0,0 +1,530 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import alluxio.AlluxioURI; +import alluxio.client.file.FileSystem; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.Bits; +import alluxio.grpc.CreateDirectoryPOptions; +import alluxio.grpc.PMode; +import alluxio.grpc.XAttrPropagationStrategy; +import alluxio.master.audit.AsyncUserAccessAuditLogWriter; +import alluxio.util.CommonUtils; +import alluxio.util.ThreadUtils; +import alluxio.web.ProxyWebServer; + +import com.google.common.base.Stopwatch; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import org.eclipse.jetty.server.Request; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URLDecoder; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import javax.annotation.Nullable; +import javax.servlet.ServletContext; +import javax.servlet.ServletInputStream; +import javax.servlet.ServletOutputStream; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import javax.ws.rs.core.MultivaluedMap; +import javax.ws.rs.core.Response; + +/** + * + */ +public class S3Handler { + public static final boolean BUCKET_NAMING_RESTRICTION_ENABLED = + Configuration.getBoolean(PropertyKey.PROXY_S3_BUCKET_NAMING_RESTRICTIONS_ENABLED); + public static final int MAX_HEADER_METADATA_SIZE = + (int) Configuration.getBytes(PropertyKey.PROXY_S3_METADATA_HEADER_MAX_SIZE); + public static final boolean MULTIPART_CLEANER_ENABLED = + Configuration.getBoolean(PropertyKey.PROXY_S3_MULTIPART_UPLOAD_CLEANER_ENABLED); + // https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html + // - Undocumented edge-case, no adjacent periods with hyphens, i.e: '.-' or '-.' + public static final Pattern BUCKET_ADJACENT_DOTS_DASHES_PATTERN = Pattern.compile("([-\\.]{2})"); + public static final Pattern BUCKET_INVALIDATION_PREFIX_PATTERN = Pattern.compile("^xn--.*"); + public static final Pattern BUCKET_INVALID_SUFFIX_PATTERN = Pattern.compile(".*-s3alias$"); + public static final Pattern BUCKET_VALID_NAME_PATTERN = + Pattern.compile("[a-z0-9][a-z0-9\\.-]{1,61}[a-z0-9]"); + public static final Pattern BUCKET_PATH_PATTERN = + Pattern.compile("^" + S3RequestServlet.S3_V2_SERVICE_PATH_PREFIX + "/[^/]*$"); + public static final Pattern OBJECT_PATH_PATTERN = + Pattern.compile("^" + S3RequestServlet.S3_V2_SERVICE_PATH_PREFIX + "/[^/]*/.*$"); + public static final int BUCKET_PATH_CACHE_SIZE = 65536; + /* BUCKET_PATH_CACHE caches bucket path during specific period. + BUCKET_PATH_CACHE.put(bucketPath,true) means bucket path exists. + BUCKET_PATH_CACHE.put(bucketPath,false) plays the same effect + as BUCKET_PATH_CACHE.remove(bucketPath). */ + public static final Cache BUCKET_PATH_CACHE = CacheBuilder.newBuilder() + .maximumSize(BUCKET_PATH_CACHE_SIZE) + .expireAfterWrite( + Configuration.global().getMs(PropertyKey.PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS), + TimeUnit.MILLISECONDS) + .build(); + private static final Logger LOG = LoggerFactory.getLogger(S3Handler.class); + private static final ThreadLocal TLS_BYTES = + ThreadLocal.withInitial(() -> new byte[8 * 1024]); + private final String mBucket; + private final String mObject; + private final HttpServletRequest mServletRequest; + private final HttpServletResponse mServletResponse; + private final ServletContext mServletContext; + public AsyncUserAccessAuditLogWriter mAsyncAuditLogWriter; + String[] mUnsupportedSubResources = {"acl", "policy", "versioning", "cors", + "encryption", "intelligent-tiering", "inventory", "lifecycle", + "metrics", "ownershipControls", "replication", "website", "accelerate", + "location", "logging", "metrics", "notification", "ownershipControls", + "policyStatus", "requestPayment", "attributes", "legal-hold", "object-lock", + "retention", "torrent", "publicAccessBlock", "restore", "select"}; + Set mUnsupportedSubResourcesSet = new HashSet<>(Arrays.asList(mUnsupportedSubResources)); + Map mAmzHeaderMap = new HashMap<>(); + Request mBaseRequest; + private Stopwatch mStopwatch; + private String mUser; + private S3BaseTask mS3Task; + private FileSystem mMetaFS; + + /** + * S3Handler Constructor. + * @param bucket + * @param object + * @param request + * @param response + */ + public S3Handler(String bucket, String object, + HttpServletRequest request, HttpServletResponse response) { + mBucket = bucket; + mObject = object; + mServletRequest = request; + mServletResponse = response; + mServletContext = request.getServletContext(); + } + + /** + * Create a S3Handler based on the incoming Request. + * @param path + * @param request + * @param response + * @return A S3Handler + * @throws Exception + * + */ + public static S3Handler createHandler(String path, + HttpServletRequest request, + HttpServletResponse response) throws Exception { + Stopwatch stopwatch = Stopwatch.createStarted(); + Matcher bucketMatcher = BUCKET_PATH_PATTERN.matcher(path); + Matcher objectMatcher = OBJECT_PATH_PATTERN.matcher(path); + + String pathStr = path; + String bucket = null; + String object = null; + S3Handler handler = null; + try { + if (bucketMatcher.matches()) { + pathStr = path.substring(S3RequestServlet.S3_V2_SERVICE_PATH_PREFIX.length() + 1); + bucket = URLDecoder.decode(pathStr, "UTF-8"); + } else if (objectMatcher.matches()) { + pathStr = path.substring(S3RequestServlet.S3_V2_SERVICE_PATH_PREFIX.length() + 1); + bucket = URLDecoder.decode( + pathStr.substring(0, pathStr.indexOf(AlluxioURI.SEPARATOR)), "UTF-8"); + object = URLDecoder.decode( + pathStr.substring(pathStr.indexOf(AlluxioURI.SEPARATOR) + 1), "UTF-8"); + } + handler = new S3Handler(bucket, object, request, response); + handler.setStopwatch(stopwatch); + handler.init(); + S3BaseTask task = null; + if (object != null && !object.isEmpty()) { + task = S3ObjectTask.Factory.create(handler); + } else { + task = S3BucketTask.Factory.create(handler); + } + handler.setS3Task(task); + return handler; + } catch (Exception ex) { + LOG.error("Exception during create s3handler:{}", ThreadUtils.formatStackTrace(ex)); + throw ex; + } + } + + /** + * Process the response returned from S3Task core logic to write to downstream. + * @param servletResponse + * @param response + * @throws IOException + */ + public static void processResponse(HttpServletResponse servletResponse, + Response response) throws IOException { + try { + // Status + servletResponse.setStatus(response.getStatus()); + // Headers + final MultivaluedMap headers = response.getStringHeaders(); + for (final Map.Entry> e : headers.entrySet()) { + final Iterator it = e.getValue().iterator(); + if (!it.hasNext()) { + continue; + } + final String header = e.getKey(); + if (servletResponse.containsHeader(header)) { + // replace any headers previously set with values from Jersey container response. + servletResponse.setHeader(header, it.next()); + } + while (it.hasNext()) { + servletResponse.addHeader(header, it.next()); + } + } + // Entity + if (response.hasEntity()) { + ServletOutputStream servletOut = servletResponse.getOutputStream(); + Object entity = response.getEntity(); + if (entity instanceof InputStream) { + InputStream is = (InputStream) entity; + byte[] bytesArray = TLS_BYTES.get(); + int read; + do { + try { + read = is.read(bytesArray); + } catch (IOException ex) { + /* Alluxio thrown IOException, remapping the exception + and send new response to downstream again */ + Response errorResponse = S3ErrorResponse.createErrorResponse(ex, ""); + S3Handler.processResponse(servletResponse, errorResponse); + return; + } + if (read == -1) { + break; + } + servletOut.write(bytesArray, 0, read); + } while (true); + } else { + String contentStr = entity.toString(); + int contentLen = contentStr.length(); + servletResponse.setContentLength(contentLen); + servletOut.write(contentStr.getBytes()); + } + } + } finally { + response.close(); + } + } + + /** + * Initialize the S3Handler object in preparation for handling the request. + * @throws Exception + */ + public void init() throws Exception { + // Do Authentication of the request. + doAuthentication(); + // Extract x-amz- headers. + extractAMZHeaders(); + // Reject unsupported subresources. + rejectUnsupportedResources(); + // Init utils + ServletContext context = getServletContext(); + mMetaFS = (FileSystem) context.getAttribute(ProxyWebServer.FILE_SYSTEM_SERVLET_RESOURCE_KEY); + mAsyncAuditLogWriter = (AsyncUserAccessAuditLogWriter) context.getAttribute( + ProxyWebServer.ALLUXIO_PROXY_AUDIT_LOG_WRITER_KEY); + // Initiate the S3 API metadata directories + if (!mMetaFS.exists(new AlluxioURI(S3RestUtils.MULTIPART_UPLOADS_METADATA_DIR))) { + mMetaFS.createDirectory(new AlluxioURI(S3RestUtils.MULTIPART_UPLOADS_METADATA_DIR), + CreateDirectoryPOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL).setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE) + .build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .build()); + } + } + + /** + * get S3Task of this S3Handler. + * @return S3BaseTask + */ + public S3BaseTask getS3Task() { + return mS3Task; + } + + /** + * set S3Task for this S3Handler. + * @param task + */ + public void setS3Task(S3BaseTask task) { + mS3Task = task; + } + + /** + * get HTTP verb of this request. + * @return HTTP Verb + */ + public String getHTTPVerb() { + return mServletRequest.getMethod(); + } + + /** + * get specified HTTP header value of this request. + * @param headerName + * @return header value + */ + public String getHeader(String headerName) { + return mServletRequest.getHeader(headerName); + } + + /** + * get specified HTTP header with a default if not exist. + * @param headerName + * @param defaultHeaderValue + * @return header value + */ + public String getHeaderOrDefault(String headerName, String defaultHeaderValue) { + String headerVal = mServletRequest.getHeader(headerName); + if (headerVal == null) { + headerVal = defaultHeaderValue; + } + return headerVal; + } + + /** + * get HttpServletResponse of this request. + * @return HttpServletResponse + */ + public HttpServletResponse getServletResponse() { + return mServletResponse; + } + + /** + * get HttpServletRequest of this request. + * @return HttpServletRequest + */ + public HttpServletRequest getServletRequest() { + return mServletRequest; + } + + /** + * get ServletContext from current http conversation. + * @return ServletContext + */ + public ServletContext getServletContext() { + return mServletContext; + } + + /** + * retrieve given query parameter value. + * @param queryParam + * @return query parameter value + */ + public String getQueryParameter(String queryParam) { + return mServletRequest.getParameter(queryParam); + } + + /** + * retrieve inputstream from incoming request. + * @return ServletInputStream + */ + public ServletInputStream getInputStream() throws IOException { + return mServletRequest.getInputStream(); + } + + /** + * Creates a {@link S3AuditContext} instance. + * + * @param command the command to be logged by this {@link S3AuditContext} + * @param user user name + * @param bucket bucket name + * @param object object name + * @return newly-created {@link S3AuditContext} instance + */ + public S3AuditContext createAuditContext(String command, + String user, + @Nullable String bucket, + @Nullable String object) { + // Audit log may be enabled during runtime + AsyncUserAccessAuditLogWriter auditLogWriter = null; + if (Configuration.getBoolean(PropertyKey.PROXY_AUDIT_LOGGING_ENABLED)) { + auditLogWriter = mAsyncAuditLogWriter; + } + S3AuditContext auditContext = new S3AuditContext(auditLogWriter); + if (auditLogWriter != null) { + String ugi = ""; + if (user != null) { + try { + String primaryGroup = CommonUtils.getPrimaryGroupName(user, Configuration.global()); + ugi = user + "," + primaryGroup; + } catch (IOException e) { + LOG.debug("Failed to get primary group for user {}.", user); + ugi = user + ",N/A"; + } + } else { + ugi = "N/A"; + } + auditContext.setUgi(ugi) + .setCommand(command) + .setIp(String.format("%s:%s", + mServletRequest.getRemoteAddr(), mServletRequest.getRemotePort())) + .setBucket(bucket) + .setObject(object) + .setAllowed(true) + .setSucceeded(true) + .setCreationTimeNs(System.nanoTime()); + } + return auditContext; + } + + /** + * Utility function to dump a collection into a string. + * @param prefix + * @param collection + * @return result string + */ + public String printCollection(String prefix, Collection collection) { + StringBuilder sb = new StringBuilder(prefix + ":["); + Iterator it = collection.iterator(); + while (it.hasNext()) { + sb.append(it.next().toString()); + if (it.hasNext()) { + sb.append(","); + } + } + sb.append("]"); + return sb.toString(); + } + + /** + * Utility function to dump a map into a string. + * @param prefix + * @param map + * @return result string + */ + public String printMap(String prefix, Map map) { + StringBuilder sb = new StringBuilder(prefix + ":["); + Iterator> it = map.entrySet().iterator(); + while (it.hasNext()) { + Map.Entry entry = it.next(); + sb.append(entry.getKey().toString() + ":" + entry.getValue().toString()); + if (it.hasNext()) { + sb.append(","); + } + } + sb.append("]"); + return sb.toString(); + } + + /** + * Utility function to help extract x-amz- headers from request. + */ + public void extractAMZHeaders() { + java.util.Enumeration headerNamesIt = mServletRequest.getHeaderNames(); + while (headerNamesIt.hasMoreElements()) { + String header = headerNamesIt.nextElement(); + mAmzHeaderMap.putIfAbsent(header, mServletRequest.getHeader(header)); + } + } + + /** + * Reject unsupported request from the given subresources from request. + * @throws S3Exception + */ + public void rejectUnsupportedResources() throws S3Exception { + java.util.Enumeration parameterNamesIt = mServletRequest.getParameterNames(); + while (parameterNamesIt.hasMoreElements()) { + if (mUnsupportedSubResourcesSet.contains(parameterNamesIt.nextElement())) { + throw new S3Exception(S3Constants.EMPTY, S3ErrorCode.NOT_IMPLEMENTED); + } + } + } + + /** + * Do S3 request authentication. + * @throws Exception + */ + public void doAuthentication() throws Exception { + try { + String authorization = mServletRequest.getHeader("Authorization"); + String user = S3RestUtils.getUser(authorization, mServletRequest); + // replace the authorization header value to user + LOG.debug("request origin Authorization Header is: {}, new user header is: {}", + authorization, user); + mUser = user; + } catch (Exception e) { + LOG.warn("exception happened in Authentication."); + throw e; + } + } + + /** + * Get the user name of this request. + * @return user name + */ + public String getUser() { + return mUser; + } + + /** + * Get the bucket name of this request. + * @return bucket name + */ + public String getBucket() { + return mBucket; + } + + /** + * Get the object name of this request. + * @return object name + */ + public String getObject() { + return mObject; + } + + /** + * Get system user FileSystem object. + * @return FileSystem object + */ + public FileSystem getMetaFS() { + return mMetaFS; + } + + /** + * Get Stopwatch object used for recording this request's latency. + * @return Stopwatch object + */ + public Stopwatch getStopwatch() { + return mStopwatch; + } + + /** + * Set the Stopwatch object used for recording this request's latency. + * @param stopwatch + */ + public void setStopwatch(Stopwatch stopwatch) { + mStopwatch = stopwatch; + } +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java new file mode 100644 index 000000000000..6c00fc704bee --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java @@ -0,0 +1,1447 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import alluxio.AlluxioURI; +import alluxio.Constants; +import alluxio.client.WriteType; +import alluxio.client.file.FileInStream; +import alluxio.client.file.FileOutStream; +import alluxio.client.file.FileSystem; +import alluxio.client.file.URIStatus; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.AlluxioException; +import alluxio.exception.DirectoryNotEmptyException; +import alluxio.exception.FileAlreadyExistsException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.status.InvalidArgumentException; +import alluxio.grpc.Bits; +import alluxio.grpc.CreateDirectoryPOptions; +import alluxio.grpc.CreateFilePOptions; +import alluxio.grpc.DeletePOptions; +import alluxio.grpc.OpenFilePOptions; +import alluxio.grpc.PMode; +import alluxio.grpc.RenamePOptions; +import alluxio.grpc.S3SyntaxOptions; +import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.XAttrPropagationStrategy; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.proto.journal.File; +import alluxio.util.ThreadUtils; +import alluxio.web.ProxyWebServer; + +import com.codahale.metrics.Timer; +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.dataformat.xml.XmlMapper; +import com.google.common.base.Preconditions; +import com.google.common.io.BaseEncoding; +import com.google.common.io.ByteStreams; +import com.google.common.primitives.Longs; +import com.google.common.util.concurrent.RateLimiter; +import com.google.protobuf.ByteString; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; +import java.security.DigestOutputStream; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.stream.Collectors; +import javax.servlet.http.HttpServletResponse; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +/** + * S3 Tasks to handle object level request. + * (bucket and object name provided in the request) + */ +public class S3ObjectTask extends S3BaseTask { + private static final Logger LOG = LoggerFactory.getLogger(S3ObjectTask.class); + + protected S3ObjectTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + throw new S3Exception(S3ErrorCode.NOT_IMPLEMENTED); + }); + } + + /** + * Concatenate bucket and object to make a full path. + * @return full path + */ + public String getObjectTaskResource() { + return mHandler.getBucket() + AlluxioURI.SEPARATOR + mHandler.getObject(); + } + + /** + * Factory for getting a S3ObjectTask. + */ + public static final class Factory { + /** + * Marshall the request and create corresponding object level S3 task. + * @param handler + * @return S3ObjectTask + */ + public static S3ObjectTask create(S3Handler handler) { + switch (handler.getHTTPVerb()) { + case "GET": + if (handler.getQueryParameter("uploadId") != null) { + return new ListPartsTask(handler, OpType.ListParts); + } else if (handler.getQueryParameter("tagging") != null) { + return new GetObjectTaggingTask(handler, OpType.GetObjectTagging); + } else { + return new GetObjectTask(handler, OpType.GetObject); + } + case "PUT": + if (handler.getQueryParameter("tagging") != null) { + return new PutObjectTaggingTask(handler, OpType.PutObjectTagging); + } else if (handler.getQueryParameter("uploadId") != null) { + if (handler.getHeader(S3Constants.S3_COPY_SOURCE_HEADER) != null) { + return new UploadPartTask(handler, OpType.UploadPartCopy); + } + return new UploadPartTask(handler, OpType.UploadPart); + } else { + if (handler.getHeader(S3Constants.S3_COPY_SOURCE_HEADER) != null) { + return new CopyObjectTask(handler, OpType.CopyObject); + } + return new PutObjectTask(handler, OpType.PutObject); + } + case "POST": + if (handler.getQueryParameter("uploads") != null) { + return new CreateMultipartUploadTask(handler, OpType.CreateMultipartUpload); + } else if (handler.getQueryParameter("uploadId") != null) { + return new CompleteMultipartUploadTask(handler, OpType.CompleteMultipartUpload); + } + break; + case "HEAD": + return new HeadObjectTask(handler, OpType.HeadObject); + case "DELETE": + if (handler.getQueryParameter("uploadId") != null) { + return new AbortMultipartUploadTask(handler, OpType.AbortMultipartUpload); + } else if (handler.getQueryParameter("tagging") != null) { + return new DeleteObjectTaggingTask(handler, OpType.DeleteObjectTagging); + } else { + return new DeleteObjectTask(handler, OpType.DeleteObject); + } + default: + return new S3ObjectTask(handler, OpType.Unsupported); + } + return new S3ObjectTask(handler, OpType.Unsupported); + } + } + + private static final class ListPartsTask extends S3ObjectTask { + + public ListPartsTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + final String uploadId = mHandler.getQueryParameter("uploadId"); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); + + AlluxioURI tmpDir = new AlluxioURI(S3RestUtils.getMultipartTemporaryDirForObject( + bucketPath, mHandler.getObject(), uploadId)); + try { + S3RestUtils.checkStatusesForUploadId(mHandler.getMetaFS(), userFs, tmpDir, uploadId); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception((e instanceof FileDoesNotExistException) + ? new S3Exception(mHandler.getObject(), S3ErrorCode.NO_SUCH_UPLOAD) : e, + mHandler.getObject(), auditContext); + } + + try { + List statuses = userFs.listStatus(tmpDir); + statuses.sort(new S3RestUtils.URIStatusNameComparator()); + + List parts = new ArrayList<>(); + for (URIStatus status : statuses) { + parts.add(ListPartsResult.Part.fromURIStatus(status)); + } + + ListPartsResult result = new ListPartsResult(); + result.setBucket(bucketPath); + result.setKey(mHandler.getObject()); + result.setUploadId(uploadId); + result.setParts(parts); + return result; + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, tmpDir.getPath(), auditContext); + } + } + }); + } + } // end of ListPartsTask + + private static final class GetObjectTaggingTask extends S3ObjectTask { + + public GetObjectTaggingTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + AlluxioURI uri = new AlluxioURI(objectPath); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); + try { + TaggingData tagData = S3RestUtils.deserializeTags(userFs.getStatus(uri).getXAttr()); + LOG.debug("GetObjectTagging tagData={}", tagData); + return tagData != null ? tagData : new TaggingData(); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + }); + } + } // end of GetObjectTaggingTask + + private static final class PutObjectTaggingTask extends S3ObjectTask { + + private PutObjectTaggingTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + AlluxioURI objectUri = new AlluxioURI(objectPath); + TaggingData tagData = null; + try { + tagData = new XmlMapper().readerFor(TaggingData.class) + .readValue(mHandler.getInputStream()); + } catch (IOException e) { + if (e.getCause() instanceof S3Exception) { + throw S3RestUtils.toObjectS3Exception((S3Exception) e.getCause(), objectPath, + auditContext); + } + auditContext.setSucceeded(false); + throw new S3Exception(e, objectPath, S3ErrorCode.MALFORMED_XML); + } + LOG.debug("PutObjectTagging tagData={}", tagData); + Map xattrMap = new HashMap<>(); + if (tagData != null) { + try { + xattrMap.put(S3Constants.TAGGING_XATTR_KEY, TaggingData.serialize(tagData)); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + try { + SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() + .putAllXattr(xattrMap) + .setXattrUpdateStrategy(File.XAttrUpdateStrategy.UNION_REPLACE) + .build(); + userFs.setAttribute(objectUri, attrPOptions); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + return Response.ok().build(); + } + }); + } + } // end of PutObjectTaggingTask + + private static final class GetObjectTask extends S3ObjectTask { + + public GetObjectTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + final String range = mHandler.getHeaderOrDefault("Range", null); + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + AlluxioURI objectUri = new AlluxioURI(objectPath); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { + try { + URIStatus status = userFs.getStatus(objectUri); + FileInStream is = userFs.openFile(status, OpenFilePOptions.getDefaultInstance()); + S3RangeSpec s3Range = S3RangeSpec.Factory.create(range); + RangeFileInStream ris = RangeFileInStream.Factory.create( + is, status.getLength(), s3Range); + + InputStream inputStream; + RateLimiter globalRateLimiter = (RateLimiter) mHandler.getServletContext() + .getAttribute(ProxyWebServer.GLOBAL_RATE_LIMITER_SERVLET_RESOURCE_KEY); + long rate = (long) mHandler.getMetaFS().getConf() + .getInt(PropertyKey.PROXY_S3_SINGLE_CONNECTION_READ_RATE_LIMIT_MB) * Constants.MB; + RateLimiter currentRateLimiter = S3RestUtils.createRateLimiter(rate).orElse(null); + if (currentRateLimiter == null && globalRateLimiter == null) { + inputStream = ris; + } else { + inputStream = new RateLimitInputStream(ris, globalRateLimiter, currentRateLimiter); + } + + Response.ResponseBuilder res = Response.ok(inputStream, + MediaType.APPLICATION_OCTET_STREAM_TYPE) + .lastModified(new Date(status.getLastModificationTimeMs())) + .header(S3Constants.S3_CONTENT_LENGTH_HEADER, + s3Range.getLength(status.getLength())); + + // Check range + if (s3Range.isValid()) { + res.status(Response.Status.PARTIAL_CONTENT) + .header(S3Constants.S3_ACCEPT_RANGES_HEADER, S3Constants.S3_ACCEPT_RANGES_VALUE) + .header(S3Constants.S3_CONTENT_RANGE_HEADER, + s3Range.getRealRange(status.getLength())); + } + + // Check for the object's ETag + String entityTag = S3RestUtils.getEntityTag(status); + if (entityTag != null) { + res.header(S3Constants.S3_ETAG_HEADER, entityTag); + } else { + LOG.debug("Failed to find ETag for object: " + objectPath); + } + + // Check if the object had a specified "Content-Type" + res.type(S3RestUtils.deserializeContentType(status.getXAttr())); + + // Check if object had tags, if so we need to return the count + // in the header "x-amz-tagging-count" + TaggingData tagData = S3RestUtils.deserializeTags(status.getXAttr()); + if (tagData != null) { + int taggingCount = tagData.getTagMap().size(); + if (taggingCount > 0) { + res.header(S3Constants.S3_TAGGING_COUNT_HEADER, taggingCount); + } + } + return res.build(); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + }); + } + } // end of GetObjectTask + + private static final class HeadObjectTask extends S3ObjectTask { + + public HeadObjectTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + Preconditions.checkNotNull(mHandler.getBucket(), "required 'bucket' parameter is missing"); + Preconditions.checkNotNull(mHandler.getObject(), "required 'object' parameter is missing"); + + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + AlluxioURI objectUri = new AlluxioURI(objectPath); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { + try { + URIStatus status = userFs.getStatus(objectUri); + if (status.isFolder() && !mHandler.getObject().endsWith(AlluxioURI.SEPARATOR)) { + throw new FileDoesNotExistException(status.getPath() + " is a directory"); + } + Response.ResponseBuilder res = Response.ok() + .lastModified(new Date(status.getLastModificationTimeMs())) + .header(S3Constants.S3_CONTENT_LENGTH_HEADER, + status.isFolder() ? 0 : status.getLength()); + + // Check for the object's ETag + String entityTag = S3RestUtils.getEntityTag(status); + if (entityTag != null) { + res.header(S3Constants.S3_ETAG_HEADER, entityTag); + } else { + LOG.debug("Failed to find ETag for object: " + objectPath); + } + + // Check if the object had a specified "Content-Type" + res.type(S3RestUtils.deserializeContentType(status.getXAttr())); + return res.build(); + } catch (FileDoesNotExistException e) { + // must be null entity (content length 0) for S3A Filesystem + return Response.status(404).entity(null).header("Content-Length", "0").build(); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + }); + } + } // end of HeadObjectTask + + private static final class CopyObjectTask extends PutObjectTask { + + public CopyObjectTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + final String bucket = mHandler.getBucket(); + final String object = mHandler.getObject(); + Preconditions.checkNotNull(bucket, "required 'bucket' parameter is missing"); + Preconditions.checkNotNull(object, "required 'object' parameter is missing"); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + object; + + final String copySourceParam = mHandler.getHeader(S3Constants.S3_COPY_SOURCE_HEADER); + String copySource = !copySourceParam.startsWith(AlluxioURI.SEPARATOR) + ? AlluxioURI.SEPARATOR + copySourceParam : copySourceParam; + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { + + if (objectPath.endsWith(AlluxioURI.SEPARATOR)) { + createDirectory(objectPath, userFs, auditContext); + } + AlluxioURI objectUri = new AlluxioURI(objectPath); + + // Populate the xattr Map with the metadata tags if provided + Map xattrMap = new HashMap<>(); + final String taggingHeader = mHandler.getHeader(S3Constants.S3_TAGGING_HEADER); + S3RestUtils.populateTaggingInXAttr(xattrMap, taggingHeader, auditContext, objectPath); + + // populate the xAttr map with the "Content-Type" header + final String contentTypeHeader = mHandler.getHeader(S3Constants.S3_CONTENT_TYPE_HEADER); + S3RestUtils.populateContentTypeInXAttr(xattrMap, contentTypeHeader); + + try { + copySource = URLDecoder.decode(copySource, "UTF-8"); + } catch (UnsupportedEncodingException ex) { + throw S3RestUtils.toObjectS3Exception(ex, objectPath, auditContext); + } + URIStatus status = null; + CreateFilePOptions.Builder copyFilePOptionsBuilder = CreateFilePOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE) + .build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .setOverwrite(true) + .setCheckS3BucketPath(true); + + // Handle metadata directive + final String metadataDirective = mHandler.getHeader( + S3Constants.S3_METADATA_DIRECTIVE_HEADER); + if (StringUtils.equals(metadataDirective, S3Constants.Directive.REPLACE.name()) + && xattrMap.containsKey(S3Constants.CONTENT_TYPE_XATTR_KEY)) { + copyFilePOptionsBuilder.putXattr(S3Constants.CONTENT_TYPE_XATTR_KEY, + xattrMap.get(S3Constants.CONTENT_TYPE_XATTR_KEY)); + } else { // defaults to COPY + try { + status = userFs.getStatus(new AlluxioURI(copySource)); + if (status.getFileInfo().getXAttr() != null) { + copyFilePOptionsBuilder.putXattr(S3Constants.CONTENT_TYPE_XATTR_KEY, + ByteString.copyFrom(status.getFileInfo().getXAttr().getOrDefault( + S3Constants.CONTENT_TYPE_XATTR_KEY, + MediaType.APPLICATION_OCTET_STREAM.getBytes(S3Constants.HEADER_CHARSET)))); + } + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + + // Handle tagging directive + final String taggingDirective = mHandler.getHeader( + S3Constants.S3_TAGGING_DIRECTIVE_HEADER); + if (StringUtils.equals(taggingDirective, S3Constants.Directive.REPLACE.name()) + && xattrMap.containsKey(S3Constants.TAGGING_XATTR_KEY)) { + copyFilePOptionsBuilder.putXattr(S3Constants.TAGGING_XATTR_KEY, + xattrMap.get(S3Constants.TAGGING_XATTR_KEY)); + } else { // defaults to COPY + try { + if (status == null) { + status = userFs.getStatus(new AlluxioURI(copySource)); + } + if (status.getFileInfo().getXAttr() != null + && status.getFileInfo().getXAttr() + .containsKey(S3Constants.TAGGING_XATTR_KEY)) { + copyFilePOptionsBuilder.putXattr(S3Constants.TAGGING_XATTR_KEY, + TaggingData.serialize(S3RestUtils.deserializeTags(status.getXAttr()))); + } + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + + String entityTag = copyObject(userFs, auditContext, + objectPath, copySource, copyFilePOptionsBuilder.build()); + return new CopyObjectResult(entityTag, System.currentTimeMillis()); + } + }); + } + } // end of CopyObjectTask + + private static class PutObjectTask extends S3ObjectTask { + // For both PutObject and UploadPart + + public PutObjectTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + /** + * Common function for create object. + * TODO(lucy) needs to change the central logic here of how we do overwrite + * current logic introduces unhandled race conditions + * @param objectPath + * @param userFs + * @param createFilePOptions + * @param auditContext + * @return Response + * @throws S3Exception + */ + public Response createObject(String objectPath, FileSystem userFs, + CreateFilePOptions createFilePOptions, S3AuditContext auditContext) + throws S3Exception { + AlluxioURI objectUri = new AlluxioURI(objectPath); + final String decodedLengthHeader = mHandler.getHeader("x-amz-decoded-content-length"); + final String contentLength = mHandler.getHeader("Content-Length"); + try { + MessageDigest md5 = MessageDigest.getInstance("MD5"); + + // The request body can be in the aws-chunked encoding format, or not encoded at all + // determine if it's encoded, and then which parts of the stream to read depending on + // the encoding type. + boolean isChunkedEncoding = decodedLengthHeader != null; + long toRead; + InputStream readStream = mHandler.getInputStream(); + if (isChunkedEncoding) { + toRead = Long.parseLong(decodedLengthHeader); + readStream = new ChunkedEncodingInputStream(readStream); + } else { + toRead = Long.parseLong(contentLength); + } + FileOutStream os = userFs.createFile(objectUri, createFilePOptions); + try (DigestOutputStream digestOutputStream = new DigestOutputStream(os, md5)) { + long read = ByteStreams.copy(ByteStreams.limit(readStream, toRead), + digestOutputStream); + if (read < toRead) { + throw new IOException(String.format( + "Failed to read all required bytes from the stream. Read %d/%d", + read, toRead)); + } + } + + byte[] digest = md5.digest(); + String base64Digest = BaseEncoding.base64().encode(digest); + final String contentMD5 = mHandler.getHeader("Content-MD5"); + if (contentMD5 != null && !contentMD5.equals(base64Digest)) { + // The object may be corrupted, delete the written object and return an error. + try { + userFs.delete(objectUri, DeletePOptions.newBuilder().setRecursive(true).build()); + } catch (Exception e2) { + // intend to continue and return BAD_DIGEST S3Exception. + } + throw new S3Exception(objectUri.getPath(), S3ErrorCode.BAD_DIGEST); + } + + String entityTag = Hex.encodeHexString(digest); + // persist the ETag via xAttr + // TODO(czhu): try to compute the ETag prior to creating the file + // to reduce total RPC RTT + S3RestUtils.setEntityTag(userFs, objectUri, entityTag); + return Response.ok().header(S3Constants.S3_ETAG_HEADER, entityTag).build(); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + + /** + * Common util func to create directory in alluxio. + * @param objectPath + * @param userFs + * @param auditContext + * @return Response + * @throws S3Exception + */ + public Response createDirectory(String objectPath, FileSystem userFs, + S3AuditContext auditContext) + throws S3Exception { + // Need to create a folder + // TODO(czhu): verify S3 behaviour when ending an object path with a delimiter + // - this is a convenience method for the Alluxio fs which does not have a + // direct counterpart for S3, since S3 does not have "folders" as actual objects + try { + CreateDirectoryPOptions dirOptions = CreateDirectoryPOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .setAllowExists(true) + .setCheckS3BucketPath(true) + .build(); + userFs.createDirectory(new AlluxioURI(objectPath), dirOptions); + } catch (FileAlreadyExistsException e) { + // ok if directory already exists the user wanted to create it anyway + LOG.warn("attempting to create dir which already exists"); + } catch (IOException | AlluxioException e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + return Response.ok().build(); + } + + /** + * Common func for copy from a source path to target path. + * @param userFs + * @param auditContext + * @param targetPath + * @param sourcePath + * @param copyFilePOption + * @return entityTag(Etag) + * @throws S3Exception + */ + public String copyObject(FileSystem userFs, S3AuditContext auditContext, + String targetPath, String sourcePath, + CreateFilePOptions copyFilePOption) + throws S3Exception { + AlluxioURI objectUri = new AlluxioURI(targetPath); + if (sourcePath.equals(targetPath)) { + // do not need to copy a file to itself, unless we are changing file attributes + // TODO(czhu): support changing metadata via CopyObject to self, + // verify for UploadPartCopy + auditContext.setSucceeded(false); + throw new S3Exception("Copying an object to itself invalid.", + targetPath, S3ErrorCode.INVALID_REQUEST); + } + URIStatus status; + try { + status = userFs.getStatus(new AlluxioURI(sourcePath)); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, targetPath, auditContext); + } + final String range = mHandler.getHeaderOrDefault(S3Constants.S3_COPY_SOURCE_RANGE, null); + S3RangeSpec s3Range = S3RangeSpec.Factory.create(range); + try (FileInStream in = userFs.openFile(new AlluxioURI(sourcePath)); + RangeFileInStream ris = RangeFileInStream.Factory.create(in, status.getLength(), + s3Range); + FileOutStream out = userFs.createFile(objectUri, copyFilePOption)) { + MessageDigest md5 = MessageDigest.getInstance("MD5"); + try (DigestOutputStream digestOut = new DigestOutputStream(out, md5)) { + IOUtils.copyLarge(ris, digestOut, new byte[8 * Constants.MB]); + byte[] digest = md5.digest(); + String entityTag = Hex.encodeHexString(digest); + // persist the ETag via xAttr + // TODO(czhu): compute the ETag prior to creating the file to reduce total RPC RTT + S3RestUtils.setEntityTag(userFs, objectUri, entityTag); + return entityTag; + } catch (IOException e) { + try { + out.cancel(); + } catch (Throwable t2) { + e.addSuppressed(t2); + } + throw e; + } + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, targetPath, auditContext); + } + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // PutObject / UploadPart ... + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + final String bucket = mHandler.getBucket(); + final String object = mHandler.getObject(); + Preconditions.checkNotNull(bucket, "required 'bucket' parameter is missing"); + Preconditions.checkNotNull(object, "required 'object' parameter is missing"); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); + + try (S3AuditContext auditContext = + mHandler.createAuditContext(mOPType.name(), user, bucket, object)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + object; + + if (objectPath.endsWith(AlluxioURI.SEPARATOR)) { + return createDirectory(objectPath, userFs, auditContext); + } + + // Populate the xattr Map with the metadata tags if provided + Map xattrMap = new HashMap<>(); + final String taggingHeader = mHandler.getHeader(S3Constants.S3_TAGGING_HEADER); + S3RestUtils.populateTaggingInXAttr(xattrMap, taggingHeader, auditContext, objectPath); + + // populate the xAttr map with the "Content-Type" header + final String contentTypeHeader = mHandler.getHeader(S3Constants.S3_CONTENT_TYPE_HEADER); + S3RestUtils.populateContentTypeInXAttr(xattrMap, contentTypeHeader); + + CreateFilePOptions filePOptions = + CreateFilePOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .putAllXattr(xattrMap).setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .setOverwrite(true) + .setCheckS3BucketPath(true) + .build(); + return createObject(objectPath, userFs, filePOptions, auditContext); + } + }); + } + } // end of PutObjectTask + + private static final class UploadPartTask extends PutObjectTask { + + public UploadPartTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // UploadPart related params + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + final String bucket = mHandler.getBucket(); + final String object = mHandler.getObject(); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); + + final String partNumberStr = mHandler.getQueryParameter("partNumber"); + Integer partNumber = null; + if (StringUtils.isNotEmpty(partNumberStr)) { + try { + partNumber = Integer.parseInt(partNumberStr); + } catch (Exception ex) { + return new S3Exception(ex, object, S3ErrorCode.INVALID_ARGUMENT); + } + } + final String uploadId = mHandler.getQueryParameter("uploadId"); + Preconditions.checkNotNull(partNumber, "required 'partNumber' parameter is missing"); + Preconditions.checkNotNull(partNumber, "required 'uploadId' parameter is missing"); + + try (S3AuditContext auditContext = + mHandler.createAuditContext(mOPType.name(), user, bucket, object)) { + // This object is part of a multipart upload, should be uploaded into the temporary + // directory first. + String tmpDir = + S3RestUtils.getMultipartTemporaryDirForObject(bucketPath, object, uploadId); + try { + S3RestUtils.checkStatusesForUploadId( + mHandler.getMetaFS(), userFs, new AlluxioURI(tmpDir), uploadId); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception((e instanceof FileDoesNotExistException) + ? new S3Exception(object, S3ErrorCode.NO_SUCH_UPLOAD) : e, + object, auditContext); + } + String objectPath = tmpDir + AlluxioURI.SEPARATOR + partNumber; + // eg: /bucket/folder/object_/ + + // UploadPartCopy with source from another object + if (mHandler.getHeader(S3Constants.S3_COPY_SOURCE_HEADER) != null) { + final String copySourceParam = mHandler.getHeader(S3Constants.S3_COPY_SOURCE_HEADER); + String copySource = !copySourceParam.startsWith(AlluxioURI.SEPARATOR) + ? AlluxioURI.SEPARATOR + copySourceParam : copySourceParam; + try { + copySource = URLDecoder.decode(copySource, "UTF-8"); + } catch (UnsupportedEncodingException ex) { + throw S3RestUtils.toObjectS3Exception(ex, objectPath, auditContext); + } + CreateFilePOptions.Builder copyFilePOptionsBuilder = CreateFilePOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .setOverwrite(true); + String entityTag = copyObject(userFs, auditContext, objectPath, + copySource, copyFilePOptionsBuilder.build()); + return new CopyPartResult(entityTag); + } + // UploadPart with source from http body + CreateFilePOptions filePOptions = + CreateFilePOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .setOverwrite(true) + .build(); + return createObject(objectPath, userFs, filePOptions, auditContext); + } + }); + } + } // end of UploadPartTask + + private static final class CreateMultipartUploadTask extends S3ObjectTask { + + public CreateMultipartUploadTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // CreateMultipartUploadTask ... + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + final String bucket = mHandler.getBucket(); + final String object = mHandler.getObject(); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + object; + + // Populate the xattr Map with the metadata tags if provided + Map xattrMap = new HashMap<>(); + + TaggingData tagData = null; + final String taggingHeader = mHandler.getHeader(S3Constants.S3_TAGGING_HEADER); + final String contentTypeHeader = mHandler.getHeader(S3Constants.S3_CONTENT_TYPE_HEADER); + try (S3AuditContext auditContext = mHandler.createAuditContext( + "initiateMultipartUpload", user, bucket, object)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); + if (taggingHeader != null) { // Parse the tagging header if it exists + try { + tagData = S3RestUtils.deserializeTaggingHeader( + taggingHeader, S3Handler.MAX_HEADER_METADATA_SIZE); + xattrMap.put(S3Constants.TAGGING_XATTR_KEY, TaggingData.serialize(tagData)); + } catch (S3Exception e) { + auditContext.setSucceeded(false); + throw e; // rethrow + } catch (IllegalArgumentException e) { + if (e.getCause() instanceof S3Exception) { + throw S3RestUtils.toObjectS3Exception((S3Exception) e.getCause(), objectPath, + auditContext); + } + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + LOG.debug("InitiateMultipartUpload tagData={}", tagData); + } + + try { + // Find an unused UUID + String uploadId; + do { + uploadId = UUID.randomUUID().toString(); + } while (mHandler.getMetaFS().exists( + new AlluxioURI(S3RestUtils.getMultipartMetaFilepathForUploadId(uploadId)))); + + // Create the directory containing the upload parts + AlluxioURI multipartTemporaryDir = new AlluxioURI( + S3RestUtils.getMultipartTemporaryDirForObject(bucketPath, object, uploadId)); + userFs.createDirectory(multipartTemporaryDir, CreateDirectoryPOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .setCheckS3BucketPath(true) + .build()); + + // Create the Alluxio multipart upload metadata file + if (contentTypeHeader != null) { + xattrMap.put(S3Constants.CONTENT_TYPE_XATTR_KEY, + ByteString.copyFrom(contentTypeHeader, S3Constants.HEADER_CHARSET)); + } + xattrMap.put(S3Constants.UPLOADS_BUCKET_XATTR_KEY, + ByteString.copyFrom(mHandler.getBucket(), S3Constants.XATTR_STR_CHARSET)); + xattrMap.put(S3Constants.UPLOADS_OBJECT_XATTR_KEY, + ByteString.copyFrom(mHandler.getObject(), S3Constants.XATTR_STR_CHARSET)); + xattrMap.put(S3Constants.UPLOADS_FILE_ID_XATTR_KEY, ByteString.copyFrom( + Longs.toByteArray(userFs.getStatus(multipartTemporaryDir).getFileId()))); + try (FileOutStream fos = mHandler.getMetaFS().createFile( + new AlluxioURI(S3RestUtils.getMultipartMetaFilepathForUploadId(uploadId)), + CreateFilePOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .putAllXattr(xattrMap) + .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .build() + )) { + // Empty file creation, nothing to do. + } + SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() + .setOwner(user) + .build(); + mHandler.getMetaFS().setAttribute(new AlluxioURI( + S3RestUtils.getMultipartMetaFilepathForUploadId(uploadId)), attrPOptions); + if (S3Handler.MULTIPART_CLEANER_ENABLED) { + MultipartUploadCleaner.apply(mHandler.getMetaFS(), userFs, bucket, object, uploadId); + } + return new InitiateMultipartUploadResult(bucket, object, uploadId); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + }); + } + } // end of CreateMultipartUploadTask + + /** + * CompleteMultipartUploadTask. + */ + public static final class CompleteMultipartUploadTask extends S3ObjectTask { + private final boolean mKeepAliveEnabled = Configuration.getBoolean( + PropertyKey.PROXY_S3_COMPLETE_MULTIPART_UPLOAD_KEEPALIVE_ENABLED); + private final Long mKeepAliveTime = Configuration.getMs( + PropertyKey.PROXY_S3_COMPLETE_MULTIPART_UPLOAD_KEEPALIVE_TIME_INTERVAL); + private String mUploadId; + private FileSystem mUserFs; + private String mBucket; + private String mObject; + + /** + * Create a CompleteMultipartUploadTask. + * @param handler + * @param opType + */ + public CompleteMultipartUploadTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public void handleTaskAsync() { + try { + final String user = mHandler.getUser(); + mBucket = mHandler.getBucket(); + mObject = mHandler.getObject(); + final String uploadId = mHandler.getQueryParameter("uploadId"); + LOG.debug("(bucket: {}, object: {}, uploadId: {}) queuing task...", + mBucket, mObject, uploadId); + HttpServletResponse httpServletResponse = mHandler.getServletResponse(); + + // Set headers before getting committed when flushing whitespaces + httpServletResponse.setContentType(MediaType.APPLICATION_XML); + + CompletableFuture respFut = new CompletableFuture<>(); + ExecutorService es = (ExecutorService) mHandler.getServletContext().getAttribute( + S3RequestServlet.PROXY_S3_V2_HEAVY_POOL); + es.submit(() -> { + Response completeMpUploadResponse = mHandler.getS3Task().continueTask(); + respFut.complete(completeMpUploadResponse); + }); + if (mKeepAliveEnabled) { + // Set status before getting committed when flushing whitespaces + httpServletResponse.setStatus(HttpServletResponse.SC_OK); + long sleepMs = 1000; + while (!respFut.isDone()) { + LOG.debug("(bucket: {}, object: {}, uploadId: {}) sleeping for {}ms...", + mBucket, mObject, uploadId, sleepMs); + try { + Thread.sleep(sleepMs); + } catch (InterruptedException e) { + LOG.error(e.toString()); + } + // TODO(czhu): figure out how to send whitespace characters while still + // returning a correct status code + // - calling getWriter().flush() commits the response (headers, status code, etc.) + // - https://docs.oracle.com/javaee/7/api/javax/servlet/ServletResponse.html#getWriter-- + // periodically sends white space characters to keep the connection from timing out + LOG.debug("(bucket: {}, object: {}, uploadId: {}) sending whitespace...", + mBucket, mObject, uploadId); + httpServletResponse.getWriter().print(" "); + httpServletResponse.getWriter().flush(); + sleepMs = Math.min(2 * sleepMs, mKeepAliveTime); + } + } // otherwise we perform a blocking call on future.get() + + XmlMapper mapper = new XmlMapper(); + try { + Response result = respFut.get(); + if (!mKeepAliveEnabled) { + S3Handler.processResponse(httpServletResponse, result); + } else { + // entity is already a String from a serialized CompleteMultipartUploadResult + String entityStr = result.getEntity().toString(); + httpServletResponse.getWriter().write(entityStr); + } + } catch (Exception e) { + Throwable cause = e.getCause(); + if (cause instanceof S3Exception) { + S3Exception s3Exception = (S3Exception) cause; + httpServletResponse.getWriter().write(mapper.writeValueAsString( + new CompleteMultipartUploadResult(s3Exception.getErrorCode().getCode(), + s3Exception.getErrorCode().getDescription()))); + if (!mKeepAliveEnabled) { + httpServletResponse.setStatus(s3Exception.getErrorCode().getStatus().getStatusCode()); + } + } + LOG.error(ThreadUtils.formatStackTrace(cause)); + } + } catch (Exception e) { + // This try-catch is not intended to handle any exceptions, it is purely + // to ensure that encountered exceptions get logged. + LOG.error("Unhandled exception for {}/{}. {}", mHandler.getBucket(), + mHandler.getObject(), ThreadUtils.formatStackTrace(e)); +// throw e; + } + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // CompleteMultipartUploadTask ... + String objectPath = null; + String objTempPath = null; + mUploadId = mHandler.getQueryParameter("uploadId"); + final String bucket = mHandler.getBucket(); + final String object = mHandler.getObject(); + final String user = mHandler.getUser(); + mUserFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + try { + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); + S3RestUtils.checkPathIsAlluxioDirectory(mUserFs, bucketPath, null, + mHandler.BUCKET_PATH_CACHE); + objectPath = bucketPath + AlluxioURI.SEPARATOR + object; + // Check for existing multipart info files and dirs + AlluxioURI multipartTemporaryDir = new AlluxioURI( + S3RestUtils.getMultipartTemporaryDirForObject(bucketPath, object, mUploadId)); + URIStatus metaStatus; + + try (com.codahale.metrics.Timer.Context ctx = MetricsSystem + .uniformTimer(MetricKey.PROXY_CHECK_UPLOADID_STATUS_LATENCY.getName()).time()) { + metaStatus = S3RestUtils.checkStatusesForUploadId(mHandler.getMetaFS(), mUserFs, + multipartTemporaryDir, mUploadId).get(1); + } catch (Exception e) { + LOG.warn("checkStatusesForUploadId uploadId:{} failed. {}", object, + ThreadUtils.formatStackTrace(e)); + throw new S3Exception(objectPath, S3ErrorCode.NO_SUCH_UPLOAD); + } + + // Parse the HTTP request body to get the intended list of parts + CompleteMultipartUploadRequest request = parseCompleteMultipartUploadRequest(objectPath); + + // Check if the requested parts are available + List uploadedParts = validateParts(request, objectPath, multipartTemporaryDir); + + // (re)create the merged object to a temporary object path + LOG.debug("CompleteMultipartUploadTask (bucket: {}, object: {}, uploadId: {}) " + + "combining {} parts...", bucket, object, mUploadId, uploadedParts.size()); + CreateFilePOptions createFileOption = prepareForCreateTempFile(metaStatus); + objTempPath = objectPath + ".temp." + UUID.randomUUID(); + AlluxioURI objectTempUri = new AlluxioURI(objTempPath); + FileOutStream os = mUserFs.createFile(objectTempUri, createFileOption); + MessageDigest md5 = MessageDigest.getInstance("MD5"); + + try (DigestOutputStream digestOutputStream = new DigestOutputStream(os, md5); + com.codahale.metrics.Timer.Context ctx = MetricsSystem + .uniformTimer(MetricKey.PROXY_COMPLETE_MP_UPLOAD_MERGE_LATENCY + .getName()).time()) { + for (URIStatus part : uploadedParts) { + try (FileInStream is = mUserFs.openFile(new AlluxioURI(part.getPath()))) { + ByteStreams.copy(is, digestOutputStream); + } + } + } + // persist the ETag via xAttr + String entityTag = Hex.encodeHexString(md5.digest()); + // TODO(czhu): try to compute the ETag prior to creating the file to reduce total RPC RTT + S3RestUtils.setEntityTag(mUserFs, objectTempUri, entityTag); + // rename the temp file to the target object file path + AlluxioURI objectUri = new AlluxioURI(objectPath); + mUserFs.rename(objectTempUri, objectUri, RenamePOptions.newBuilder() + .setPersist(WriteType.fromProto(createFileOption.getWriteType()).isThrough()) + .setS3SyntaxOptions(S3SyntaxOptions.newBuilder() + .setOverwrite(true) + .setIsMultipartUpload(true) + .build()) + .build()); + + // Remove the temporary directory containing the uploaded parts and the + // corresponding Alluxio S3 API metadata file + try (Timer.Context ctx = MetricsSystem + .uniformTimer(MetricKey.PROXY_CLEANUP_MULTIPART_UPLOAD_LATENCY.getName()).time()) { + removePartsDirAndMPMetaFile(multipartTemporaryDir); + } + return new CompleteMultipartUploadResult(objectPath, bucket, object, entityTag); + } catch (Exception e) { + /* On exception we always check if someone completes the multipart object before us to + achieve idempotency: when a race caused by retry(most cases), the commit of + this object happens at time of rename op, check DefaultFileSystemMaster.rename. + * */ + LOG.warn("Exception during CompleteMultipartUpload:{}", ThreadUtils.formatStackTrace(e)); + if (objectPath != null) { + URIStatus objStatus = checkIfComplete(objectPath); + if (objStatus != null) { + String etag = new String(objStatus.getXAttr() + .getOrDefault(S3Constants.ETAG_XATTR_KEY, new byte[0])); + if (!etag.isEmpty()) { + LOG.info("Check for idempotency, uploadId:{} idempotency check passed.", mUploadId); + return new CompleteMultipartUploadResult(objectPath, bucket, object, etag); + } + LOG.info("Check for idempotency, uploadId:{} object path exists but no etag found.", + mUploadId); + } + } + throw S3RestUtils.toObjectS3Exception(e, object); + } finally { + // Cleanup temp obj path no matter what, if path not exist, ignore + cleanupTempPath(objTempPath); + } + }); + } + + /** + * Prepare CreateFilePOptions for create temp multipart upload file. + * + * @param metaStatus multi part upload meta file status + * @return CreateFilePOptions + */ + public CreateFilePOptions prepareForCreateTempFile(URIStatus metaStatus) { + CreateFilePOptions.Builder optionsBuilder = CreateFilePOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .putXattr(PropertyKey.Name.S3_UPLOADS_ID_XATTR_KEY, + ByteString.copyFrom(mUploadId, StandardCharsets.UTF_8)) + .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .setWriteType(S3RestUtils.getS3WriteType()); + // Copy Tagging xAttr if it exists + if (metaStatus.getXAttr().containsKey(S3Constants.TAGGING_XATTR_KEY)) { + optionsBuilder.putXattr(S3Constants.TAGGING_XATTR_KEY, + ByteString.copyFrom(metaStatus.getXAttr().get(S3Constants.TAGGING_XATTR_KEY))); + } + // Copy Content-Type Header xAttr if it exists + if (metaStatus.getXAttr().containsKey(S3Constants.CONTENT_TYPE_XATTR_KEY)) { + optionsBuilder.putXattr(S3Constants.CONTENT_TYPE_XATTR_KEY, + ByteString.copyFrom(metaStatus.getXAttr().get(S3Constants.CONTENT_TYPE_XATTR_KEY))); + } + return optionsBuilder.build(); + } + + /** + * Parse xml http body for CompleteMultipartUploadRequest. + * + * @param objectPath + * @return CompleteMultipartUploadRequest + * @throws S3Exception + */ + public CompleteMultipartUploadRequest parseCompleteMultipartUploadRequest(String objectPath) + throws S3Exception { + CompleteMultipartUploadRequest request; + try { + request = new XmlMapper().readerFor(CompleteMultipartUploadRequest.class) + .readValue(mHandler.getInputStream()); + } catch (IllegalArgumentException | IOException e) { + LOG.error("Failed parsing CompleteMultipartUploadRequest:{}", + ThreadUtils.formatStackTrace(e)); + Throwable cause = e.getCause(); + if (cause instanceof S3Exception) { + throw S3RestUtils.toObjectS3Exception((S3Exception) cause, objectPath); + } + if (e instanceof JsonParseException) { + throw new S3Exception( + new InvalidArgumentException("Failed parsing CompleteMultipartUploadRequest."), + objectPath, S3ErrorCode.INVALID_ARGUMENT); + } + throw S3RestUtils.toObjectS3Exception(e, objectPath); + } + return request; + } + + /** + * Validate the parts as part of this multipart uplaod request. + * + * @param request + * @param objectPath + * @param multipartTemporaryDir + * @return List of status of the part files + * @throws S3Exception + * @throws IOException + * @throws AlluxioException + */ + public List validateParts(CompleteMultipartUploadRequest request, + String objectPath, + AlluxioURI multipartTemporaryDir) + throws S3Exception, IOException, AlluxioException { + List uploadedParts = mUserFs.listStatus(multipartTemporaryDir); + uploadedParts.sort(new S3RestUtils.URIStatusNameComparator()); + if (uploadedParts.size() < request.getParts().size()) { + throw new S3Exception(objectPath, S3ErrorCode.INVALID_PART); + } + Map uploadedPartsMap = uploadedParts.stream().collect(Collectors.toMap( + status -> Integer.parseInt(status.getName()), + status -> status + )); + int lastPartNum = request.getParts().get(request.getParts().size() - 1).getPartNumber(); + for (CompleteMultipartUploadRequest.Part part : request.getParts()) { + if (!uploadedPartsMap.containsKey(part.getPartNumber())) { + throw new S3Exception(objectPath, S3ErrorCode.INVALID_PART); + } + if (part.getPartNumber() != lastPartNum // size requirement not applicable to last part + && uploadedPartsMap.get(part.getPartNumber()).getLength() < Configuration.getBytes( + PropertyKey.PROXY_S3_COMPLETE_MULTIPART_UPLOAD_MIN_PART_SIZE)) { + throw new S3Exception(objectPath, S3ErrorCode.ENTITY_TOO_SMALL); + } + } + return uploadedParts; + } + + /** + * Cleanup the multipart upload temporary folder holding the parts files. + * and the meta file for this multipart. + * + * @param multipartTemporaryDir + * @throws IOException + * @throws AlluxioException + */ + public void removePartsDirAndMPMetaFile(AlluxioURI multipartTemporaryDir) + throws IOException, AlluxioException { + mUserFs.delete(multipartTemporaryDir, + DeletePOptions.newBuilder().setRecursive(true).build()); + mHandler.getMetaFS().delete(new AlluxioURI( + S3RestUtils.getMultipartMetaFilepathForUploadId(mUploadId)), + DeletePOptions.newBuilder().build()); + if (S3Handler.MULTIPART_CLEANER_ENABLED) { + MultipartUploadCleaner.cancelAbort(mHandler.getMetaFS(), mUserFs, + mBucket, mObject, mUploadId); + } + } + + /** + * Cleanup the temp object file for complete multipart upload. + * + * @param objTempPath + */ + public void cleanupTempPath(String objTempPath) { + if (objTempPath != null) { + try (Timer.Context ctx = MetricsSystem + .uniformTimer(MetricKey.PROXY_CLEANUP_TEMP_MULTIPART_UPLOAD_OBJ_LATENCY + .getName()).time()) { + mUserFs.delete(new AlluxioURI(objTempPath), DeletePOptions.newBuilder().build()); + } catch (Exception e) { + LOG.warn("Failed to clean up temp path:{}, {}", objTempPath, e.getMessage()); + } + } + } + + /** + * On any exception, check with Master on if the there's an object file. + * bearing the same upload id already got completed. + * + * @param objectPath + * @return the status of the existing object through CompleteMultipartUpload call + */ + public URIStatus checkIfComplete(String objectPath) { + try { + URIStatus objStatus = mUserFs.getStatus(new AlluxioURI(objectPath)); + String uploadId = new String(objStatus.getXAttr() + .getOrDefault(PropertyKey.Name.S3_UPLOADS_ID_XATTR_KEY, new byte[0])); + if (objStatus.isCompleted() && StringUtils.equals(uploadId, mUploadId)) { + return objStatus; + } + } catch (IOException | AlluxioException ex) { + // can't validate if any previous attempt has succeeded + LOG.warn("Check for objectPath:{} failed:{}, unsure if the complete status.", + objectPath, ex.getMessage()); + return null; + } + return null; + } + } // end of CompleteMultipartUploadTask + + private static final class AbortMultipartUploadTask extends S3ObjectTask { + + public AbortMultipartUploadTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // AbortMultipartUploadTask ... + Preconditions.checkNotNull(mHandler.getBucket(), "required 'bucket' parameter is missing"); + Preconditions.checkNotNull(mHandler.getObject(), "required 'object' parameter is missing"); + + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser( + user, mHandler.getMetaFS()); + final String uploadId = mHandler.getQueryParameter("uploadId"); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + AlluxioURI multipartTemporaryDir = new AlluxioURI(S3RestUtils + .getMultipartTemporaryDirForObject(bucketPath, mHandler.getObject(), uploadId)); + try (S3AuditContext auditContext = mHandler.createAuditContext( + "abortMultipartUpload", user, mHandler.getBucket(), mHandler.getObject())) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); + try { + S3RestUtils.checkStatusesForUploadId(mHandler.getMetaFS(), + userFs, multipartTemporaryDir, uploadId); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception((e instanceof FileDoesNotExistException) + ? new S3Exception(mHandler.getObject(), S3ErrorCode.NO_SUCH_UPLOAD) : e, + mHandler.getObject(), auditContext); + } + + try { + userFs.delete(multipartTemporaryDir, + DeletePOptions.newBuilder().setRecursive(true).build()); + mHandler.getMetaFS().delete(new AlluxioURI( + S3RestUtils.getMultipartMetaFilepathForUploadId(uploadId)), + DeletePOptions.newBuilder().build()); + if (S3Handler.MULTIPART_CLEANER_ENABLED) { + MultipartUploadCleaner.cancelAbort(mHandler.getMetaFS(), userFs, + mHandler.getBucket(), mHandler.getObject(), uploadId); + } + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + + // Note: the normal response for S3 delete key is 204 NO_CONTENT, not 200 OK + return Response.Status.NO_CONTENT; + }); + } + } // end of AbortMultipartUploadTask + + private static final class DeleteObjectTaggingTask extends S3ObjectTask { + + public DeleteObjectTaggingTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // DeleteObjectTaggingTask ... + Preconditions.checkNotNull(mHandler.getBucket(), "required 'bucket' parameter is missing"); + Preconditions.checkNotNull(mHandler.getObject(), "required 'object' parameter is missing"); + + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + LOG.debug("DeleteObjectTagging object={}", mHandler.getObject()); + Map xattrMap = new HashMap<>(); + xattrMap.put(S3Constants.TAGGING_XATTR_KEY, ByteString.copyFrom(new byte[0])); + SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() + .putAllXattr(xattrMap).setXattrUpdateStrategy(File.XAttrUpdateStrategy.DELETE_KEYS) + .build(); + try (S3AuditContext auditContext = mHandler.createAuditContext( + "deleteObjectTags", user, mHandler.getBucket(), mHandler.getObject())) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); + try { + userFs.setAttribute(new AlluxioURI(objectPath), attrPOptions); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + // Note: the normal response for S3 delete key is 204 NO_CONTENT, not 200 OK + return Response.Status.NO_CONTENT; + }); + } + } // end of DeleteObjectTaggingTask + + private static final class DeleteObjectTask extends S3ObjectTask { + + public DeleteObjectTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // DeleteObjectTask ... + Preconditions.checkNotNull(mHandler.getBucket(), "required 'bucket' parameter is missing"); + Preconditions.checkNotNull(mHandler.getObject(), "required 'object' parameter is missing"); + + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + // Delete the object. + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + DeletePOptions options = DeletePOptions.newBuilder().setAlluxioOnly(Configuration + .get(PropertyKey.PROXY_S3_DELETE_TYPE).equals(Constants.S3_DELETE_IN_ALLUXIO_ONLY)) + .build(); + try (S3AuditContext auditContext = mHandler.createAuditContext( + "deleteObject", user, mHandler.getBucket(), mHandler.getObject())) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); + try { + userFs.delete(new AlluxioURI(objectPath), options); + } catch (FileDoesNotExistException | DirectoryNotEmptyException e) { + // intentionally do nothing, this is ok. It should result in a 204 error + // This is the same response behavior as AWS's S3. + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + // Note: the normal response for S3 delete key is 204 NO_CONTENT, not 200 OK + return Response.Status.NO_CONTENT; + }); + } + } // end of DeleteObjectTask +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java new file mode 100644 index 000000000000..747ffc4874cc --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java @@ -0,0 +1,123 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import alluxio.AlluxioURI; +import alluxio.Constants; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.util.ThreadUtils; +import alluxio.web.ProxyWebServer; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.concurrent.ExecutorService; +import javax.servlet.AsyncContext; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import javax.ws.rs.core.Response; + +/** + * New architecture Request Servlet for handling s3 requests + * in replacement of JAX-RS. + */ +public class S3RequestServlet extends HttpServlet { + private static final long serialVersionUID = 2966302125671934038L; + public static final String SERVICE_PREFIX = "s3"; + public static final String S3_V2_SERVICE_PATH_PREFIX = Constants.REST_API_PREFIX + + AlluxioURI.SEPARATOR + SERVICE_PREFIX; + private static final Logger LOG = LoggerFactory.getLogger(S3RequestServlet.class); + /* (Experimental for new architecture enabled by PROXY_S3_V2_VERSION_ENABLED) + * Processing threadpools for group of requests (for now, distinguish between + * light-weighted metadata-centric requests and heavy io requests */ + public static final String PROXY_S3_V2_LIGHT_POOL = "Proxy S3 V2 Light Pool"; + public static final String PROXY_S3_V2_HEAVY_POOL = "Proxy S3 V2 Heavy Pool"; + + /** + * Implementation to serve the HttpServletRequest and returns HttpServletResponse. + * @param request the {@link HttpServletRequest} object that + * contains the request the client made of + * the servlet + * + * @param response the {@link HttpServletResponse} object that + * contains the response the servlet returns + * to the client + * + * @throws ServletException + * @throws IOException + */ + @Override + public void service(HttpServletRequest request, + HttpServletResponse response) throws ServletException, IOException { + String target = request.getRequestURI(); + if (!target.startsWith(S3_V2_SERVICE_PATH_PREFIX)) { + return; + } + S3Handler s3Handler = null; + try { + s3Handler = S3Handler.createHandler(target, request, response); + } catch (Exception ex) { + Response errorResponse = S3ErrorResponse.createErrorResponse(ex, ""); + S3Handler.processResponse(response, errorResponse); + return; + } + request.setAttribute(ProxyWebServer.S3_HANDLER_ATTRIBUTE, s3Handler); + // Handle request async + if (Configuration.getBoolean(PropertyKey.PROXY_S3_V2_ASYNC_PROCESSING_ENABLED)) { + S3BaseTask.OpTag opTag = s3Handler.getS3Task().mOPType.getOpTag(); + ExecutorService es = (ExecutorService) (opTag == S3BaseTask.OpTag.LIGHT + ? getServletContext().getAttribute(PROXY_S3_V2_LIGHT_POOL) + : getServletContext().getAttribute(PROXY_S3_V2_HEAVY_POOL)); + + final AsyncContext asyncCtx = request.startAsync(); + final S3Handler s3HandlerAsync = s3Handler; + es.submit(() -> { + try { + serveRequest(s3HandlerAsync); + } catch (Throwable th) { + try { + ((HttpServletResponse) asyncCtx.getResponse()).sendError( + HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + } catch (Throwable sendErrorEx) { + LOG.error("Unexpected exception for {}/{}. {}", s3HandlerAsync.getBucket(), + s3HandlerAsync.getObject(), ThreadUtils.formatStackTrace(sendErrorEx)); + } + } finally { + asyncCtx.complete(); + } + }); + } + // Handle request in current context + else { + serveRequest(s3Handler); + } + } + + /** + * Core place to call S3 task's core API logic handling + * function w/o exception handling. + * @param s3Handler + * @throws IOException + */ + public void serveRequest(S3Handler s3Handler) throws IOException { + if (s3Handler.getS3Task().getOPType() == S3BaseTask.OpType.CompleteMultipartUpload) { + s3Handler.getS3Task().handleTaskAsync(); + return; + } + Response resp = s3Handler.getS3Task().continueTask(); + S3Handler.processResponse(s3Handler.getServletResponse(), resp); + } +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java index be75ca2ac456..d897642e646c 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java @@ -31,6 +31,7 @@ import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.DeletePOptions; import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.PMode; import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.XAttrPropagationStrategy; @@ -41,10 +42,13 @@ import com.fasterxml.jackson.dataformat.xml.XmlMapper; import com.google.common.base.Preconditions; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; import com.google.common.io.BaseEncoding; import com.google.common.io.ByteStreams; import com.google.common.net.InetAddresses; import com.google.common.primitives.Longs; +import com.google.common.util.concurrent.RateLimiter; import com.google.protobuf.ByteString; import org.apache.commons.codec.binary.Hex; import org.apache.commons.io.IOUtils; @@ -65,6 +69,7 @@ import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -105,7 +110,17 @@ public final class S3RestServiceHandler { public static final String BUCKET_PARAM = "{bucket}/"; /* Object is after bucket in the URL path */ public static final String OBJECT_PARAM = "{bucket}/{object:.+}"; - + public static final int BUCKET_PATH_CACHE_SIZE = 65536; + /* BUCKET_PATH_CACHE caches bucket path during specific period. + BUCKET_PATH_CACHE.put(bucketPath,true) means bucket path exists. + BUCKET_PATH_CACHE.put(bucketPath,false) plays the same effect + as BUCKET_PATH_CACHE.remove(bucketPath). */ + private static final Cache BUCKET_PATH_CACHE = CacheBuilder.newBuilder() + .maximumSize(BUCKET_PATH_CACHE_SIZE) + .expireAfterWrite( + Configuration.global().getMs(PropertyKey.PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS), + TimeUnit.MILLISECONDS) + .build(); private final FileSystem mMetaFS; private final InstancedConfiguration mSConf; @@ -125,6 +140,8 @@ public final class S3RestServiceHandler { private final Pattern mBucketInvalidSuffixPattern; private final Pattern mBucketValidNamePattern; + private final RateLimiter mGlobalRateLimiter; + /** * Constructs a new {@link S3RestServiceHandler}. * @@ -167,6 +184,9 @@ public S3RestServiceHandler(@Context ServletContext context) .build() ); } + + mGlobalRateLimiter = (RateLimiter) context.getAttribute( + ProxyWebServer.GLOBAL_RATE_LIMITER_SERVLET_RESOURCE_KEY); } /** @@ -209,6 +229,7 @@ public Response listAllMyBuckets() { // debatable (?) potentially breaks backcompat(?) .filter(URIStatus::isFolder) .collect(Collectors.toList()); + buckets.forEach((uri) -> BUCKET_PATH_CACHE.put(uri.getPath(), true)); return new ListAllMyBucketsResult(buckets); } }); @@ -229,7 +250,8 @@ public Response headBucket( final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mMetaFS); try (S3AuditContext auditContext = createAuditContext("headBucket", user, bucket, null)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); } return Response.ok().build(); }); @@ -286,7 +308,7 @@ public Response getBucket(@PathParam("bucket") final String bucket, if (policyStatus != null) { throw new S3Exception(bucket, new S3ErrorCode( S3ErrorCode.INTERNAL_ERROR.getCode(), - "GetBucketpolicyStatus is not currently supported.", + "GetBucketPolicyStatus is not currently supported.", S3ErrorCode.INTERNAL_ERROR.getStatus())); } @@ -295,7 +317,7 @@ public Response getBucket(@PathParam("bucket") final String bucket, final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mMetaFS); try (S3AuditContext auditContext = createAuditContext("listObjects", user, bucket, null)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext, BUCKET_PATH_CACHE); if (tagging != null) { // GetBucketTagging AlluxioURI uri = new AlluxioURI(path); try { @@ -487,7 +509,8 @@ public Response createBucket(@PathParam("bucket") final String bucket, try (S3AuditContext auditContext = createAuditContext("createBucket", user, bucket, null)) { if (tagging != null) { // PutBucketTagging - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); try { TaggingData tagData = new XmlMapper().readerFor(TaggingData.class) .readValue(is); @@ -569,6 +592,7 @@ public Response createBucket(@PathParam("bucket") final String bucket, } catch (Exception e) { throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); } + BUCKET_PATH_CACHE.put(bucketPath, true); return Response.Status.OK; } }); @@ -600,7 +624,8 @@ public Response deleteBucket(@PathParam("bucket") final String bucket, try (S3AuditContext auditContext = createAuditContext("deleteBucket", user, bucket, null)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); if (tagging != null) { // DeleteBucketTagging LOG.debug("DeleteBucketTagging bucket={}", bucketPath); @@ -619,15 +644,16 @@ public Response deleteBucket(@PathParam("bucket") final String bucket, } // Delete the bucket. - DeletePOptions options = DeletePOptions.newBuilder().setAlluxioOnly(Configuration - .get(PropertyKey.PROXY_S3_DELETE_TYPE) - .equals(Constants.S3_DELETE_IN_ALLUXIO_ONLY)) + DeletePOptions options = DeletePOptions.newBuilder().setAlluxioOnly( + Configuration.get(PropertyKey.PROXY_S3_DELETE_TYPE) + .equals(Constants.S3_DELETE_IN_ALLUXIO_ONLY)) .build(); try { userFs.delete(new AlluxioURI(bucketPath), options); } catch (Exception e) { throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); } + BUCKET_PATH_CACHE.put(bucketPath, false); return Response.Status.NO_CONTENT; } }); @@ -637,6 +663,7 @@ public Response deleteBucket(@PathParam("bucket") final String bucket, * Uploads an object or part of an object in multipart upload. * @param contentMD5 the optional Base64 encoded 128-bit MD5 digest of the object * @param copySourceParam the URL-encoded source path to copy the new file from + * @param copySourceRange the http range header * @param decodedLength the length of the content when in aws-chunked encoding * @param contentLength the total length of the request body * @param contentTypeParam the content type of the request body @@ -659,6 +686,8 @@ public Response deleteBucket(@PathParam("bucket") final String bucket, public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final String contentMD5, @HeaderParam(S3Constants.S3_COPY_SOURCE_HEADER) final String copySourceParam, + @HeaderParam(S3Constants.S3_COPY_SOURCE_RANGE) + final String copySourceRange, @HeaderParam("x-amz-decoded-content-length") final String decodedLength, @HeaderParam(S3Constants.S3_METADATA_DIRECTIVE_HEADER) @@ -709,7 +738,8 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); try (S3AuditContext auditContext = createAuditContext("createObject", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); String objectPath = bucketPath + AlluxioURI.SEPARATOR + object; if (objectPath.endsWith(AlluxioURI.SEPARATOR)) { @@ -725,6 +755,7 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin .setGroupBits(Bits.ALL) .setOtherBits(Bits.NONE).build()) .setAllowExists(true) + .setCheckS3BucketPath(true) .build(); userFs.createDirectory(new AlluxioURI(objectPath), dirOptions); } catch (FileAlreadyExistsException e) { @@ -818,6 +849,8 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin .setOtherBits(Bits.NONE).build()) .setWriteType(S3RestUtils.getS3WriteType()) .putAllXattr(xattrMap).setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .setCheckS3BucketPath(true) + .setOverwrite(true) .build(); // not copying from an existing file @@ -837,11 +870,6 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin } else { toRead = Long.parseLong(contentLength); } - try { - S3RestUtils.deleteExistObject(userFs, objectUri); - } catch (IOException | AlluxioException e) { - throw S3RestUtils.toObjectS3Exception(e, objectUri.getPath(), auditContext); - } FileOutStream os = userFs.createFile(objectUri, filePOptions); try (DigestOutputStream digestOutputStream = new DigestOutputStream(os, md5)) { long read = ByteStreams.copy(ByteStreams.limit(readStream, toRead), @@ -881,6 +909,7 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin } else { // CopyObject or UploadPartCopy String copySource = !copySourceParam.startsWith(AlluxioURI.SEPARATOR) ? AlluxioURI.SEPARATOR + copySourceParam : copySourceParam; + S3RangeSpec s3Range = S3RangeSpec.Factory.create(copySourceRange); try { copySource = URLDecoder.decode(copySource, "UTF-8"); } catch (UnsupportedEncodingException ex) { @@ -892,7 +921,10 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin .setMode(PMode.newBuilder() .setOwnerBits(Bits.ALL) .setGroupBits(Bits.ALL) - .setOtherBits(Bits.NONE).build()); + .setOtherBits(Bits.NONE).build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .setCheckS3BucketPath(true) + .setOverwrite(true); // Handle metadata directive if (metadataDirective == S3Constants.Directive.REPLACE && filePOptions.getXattrMap().containsKey(S3Constants.CONTENT_TYPE_XATTR_KEY)) { @@ -939,16 +971,21 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin throw new S3Exception("Copying an object to itself invalid.", objectPath, S3ErrorCode.INVALID_REQUEST); } + // avoid the NPE of status try { - S3RestUtils.deleteExistObject(userFs, objectUri); - } catch (IOException | AlluxioException e) { - throw S3RestUtils.toObjectS3Exception(e, objectUri.getPath(), auditContext); + if (status == null) { + status = userFs.getStatus(new AlluxioURI(copySource)); + } + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); } try (FileInStream in = userFs.openFile(new AlluxioURI(copySource)); + RangeFileInStream ris = RangeFileInStream.Factory.create(in, status.getLength(), + s3Range); FileOutStream out = userFs.createFile(objectUri, copyFilePOptionsBuilder.build())) { MessageDigest md5 = MessageDigest.getInstance("MD5"); try (DigestOutputStream digestOut = new DigestOutputStream(out, md5)) { - IOUtils.copyLarge(in, digestOut, new byte[8 * Constants.MB]); + IOUtils.copyLarge(ris, digestOut, new byte[8 * Constants.MB]); byte[] digest = md5.digest(); String entityTag = Hex.encodeHexString(digest); // persist the ETag via xAttr @@ -1007,7 +1044,8 @@ public Response initiateMultipartUpload( TaggingData tagData = null; try (S3AuditContext auditContext = createAuditContext("initiateMultipartUpload", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); if (taggingHeader != null) { // Parse the tagging header if it exists for PutObject try { tagData = S3RestUtils.deserializeTaggingHeader(taggingHeader, mMaxHeaderMetadataSize); @@ -1044,7 +1082,8 @@ public Response initiateMultipartUpload( .setOwnerBits(Bits.ALL) .setGroupBits(Bits.ALL) .setOtherBits(Bits.NONE).build()) - .setWriteType(S3RestUtils.getS3WriteType()).build()); + .setWriteType(S3RestUtils.getS3WriteType()) + .build()); // Create the Alluxio multipart upload metadata file if (contentType != null) { @@ -1069,7 +1108,7 @@ public Response initiateMultipartUpload( .putAllXattr(xattrMap) .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) .build() - ); + ).close(); SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() .setOwner(user) .build(); @@ -1191,7 +1230,8 @@ private Response listParts(final String bucket, String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); try (S3AuditContext auditContext = createAuditContext("listParts", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); AlluxioURI tmpDir = new AlluxioURI( S3RestUtils.getMultipartTemporaryDirForObject(bucketPath, object, uploadId)); @@ -1199,8 +1239,8 @@ private Response listParts(final String bucket, S3RestUtils.checkStatusesForUploadId(mMetaFS, userFs, tmpDir, uploadId); } catch (Exception e) { throw S3RestUtils.toObjectS3Exception((e instanceof FileDoesNotExistException) - ? new S3Exception(object, S3ErrorCode.NO_SUCH_UPLOAD) : e, - object, auditContext); + ? new S3Exception(object, S3ErrorCode.NO_SUCH_UPLOAD) : e, + object, auditContext); } try { @@ -1239,11 +1279,22 @@ private Response getObject(final String bucket, createAuditContext("getObject", user, bucket, object)) { try { URIStatus status = userFs.getStatus(objectUri); - FileInStream is = userFs.openFile(objectUri); + FileInStream is = userFs.openFile(status, OpenFilePOptions.getDefaultInstance()); S3RangeSpec s3Range = S3RangeSpec.Factory.create(range); RangeFileInStream ris = RangeFileInStream.Factory.create(is, status.getLength(), s3Range); - Response.ResponseBuilder res = Response.ok(ris) + InputStream inputStream; + long rate = + (long) mSConf.getInt(PropertyKey.PROXY_S3_SINGLE_CONNECTION_READ_RATE_LIMIT_MB) + * Constants.MB; + RateLimiter currentRateLimiter = S3RestUtils.createRateLimiter(rate).orElse(null); + if (currentRateLimiter == null && mGlobalRateLimiter == null) { + inputStream = ris; + } else { + inputStream = new RateLimitInputStream(ris, mGlobalRateLimiter, currentRateLimiter); + } + + Response.ResponseBuilder res = Response.ok(inputStream) .lastModified(new Date(status.getLastModificationTimeMs())) .header(S3Constants.S3_CONTENT_LENGTH_HEADER, s3Range.getLength(status.getLength())); @@ -1294,7 +1345,8 @@ private Response getObjectTags(final String bucket, AlluxioURI uri = new AlluxioURI(objectPath); try (S3AuditContext auditContext = createAuditContext("getObjectTags", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); try { TaggingData tagData = S3RestUtils.deserializeTags(userFs.getStatus(uri).getXAttr()); LOG.debug("GetObjectTagging tagData={}", tagData); @@ -1351,7 +1403,7 @@ private void abortMultipartUpload(String bucket, String object, new AlluxioURI(S3RestUtils.getMultipartTemporaryDirForObject(bucketPath, object, uploadId)); try (S3AuditContext auditContext = createAuditContext("abortMultipartUpload", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, BUCKET_PATH_CACHE); try { S3RestUtils.checkStatusesForUploadId(mMetaFS, userFs, multipartTemporaryDir, uploadId); } catch (Exception e) { @@ -1381,12 +1433,13 @@ private void deleteObject(String bucket, String object) throws S3Exception { String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); // Delete the object. String objectPath = bucketPath + AlluxioURI.SEPARATOR + object; - DeletePOptions options = DeletePOptions.newBuilder().setAlluxioOnly(Configuration - .get(PropertyKey.PROXY_S3_DELETE_TYPE).equals(Constants.S3_DELETE_IN_ALLUXIO_ONLY)) + DeletePOptions options = DeletePOptions.newBuilder().setAlluxioOnly( + Configuration.get(PropertyKey.PROXY_S3_DELETE_TYPE) + .equals(Constants.S3_DELETE_IN_ALLUXIO_ONLY)) .build(); try (S3AuditContext auditContext = createAuditContext("deleteObject", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, BUCKET_PATH_CACHE); try { userFs.delete(new AlluxioURI(objectPath), options); } catch (FileDoesNotExistException | DirectoryNotEmptyException e) { @@ -1412,7 +1465,7 @@ private void deleteObjectTags(String bucket, String object) .build(); try (S3AuditContext auditContext = createAuditContext("deleteObjectTags", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, BUCKET_PATH_CACHE); try { userFs.setAttribute(new AlluxioURI(objectPath), attrPOptions); } catch (Exception e) { @@ -1465,7 +1518,7 @@ private S3AuditContext createAuditContext(String command, String user, @Nullable String bucket, @Nullable String object) { // Audit log may be enabled during runtime AsyncUserAccessAuditLogWriter auditLogWriter = null; - if (Configuration.getBoolean(PropertyKey.MASTER_AUDIT_LOGGING_ENABLED)) { + if (Configuration.getBoolean(PropertyKey.PROXY_AUDIT_LOGGING_ENABLED)) { auditLogWriter = mAsyncAuditLogWriter; } S3AuditContext auditContext = new S3AuditContext(auditLogWriter); diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java index 96b324212e0b..e65f627c1c0d 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java @@ -21,6 +21,7 @@ import alluxio.exception.AccessControlException; import alluxio.exception.AlluxioException; import alluxio.exception.DirectoryNotEmptyException; +import alluxio.exception.ExceptionMessage; import alluxio.exception.FileAlreadyExistsException; import alluxio.exception.FileDoesNotExistException; import alluxio.exception.InvalidPathException; @@ -36,10 +37,13 @@ import alluxio.security.authentication.AuthenticatedClientUser; import alluxio.security.user.ServerUserState; import alluxio.util.SecurityUtils; +import alluxio.util.ThreadUtils; import com.fasterxml.jackson.dataformat.xml.XmlMapper; import com.google.common.annotations.VisibleForTesting; +import com.google.common.cache.Cache; import com.google.common.primitives.Longs; +import com.google.common.util.concurrent.RateLimiter; import com.google.protobuf.ByteString; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -56,10 +60,13 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.TreeMap; +import java.util.regex.Pattern; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.security.auth.Subject; +import javax.servlet.http.HttpServletRequest; import javax.ws.rs.container.ContainerRequestContext; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.MultivaluedMap; @@ -123,7 +130,11 @@ public static Response call(String resource, S3RestUtils.RestCallable cal XmlMapper mapper = new XmlMapper(); return Response.ok(mapper.writeValueAsString(result)).build(); } catch (Exception e) { - LOG.warn("Error invoking REST endpoint for {}:\n{}", resource, e.getMessage()); + String errOutputMsg = e.getMessage(); + if (StringUtils.isEmpty(errOutputMsg)) { + errOutputMsg = ThreadUtils.formatStackTrace(e); + } + LOG.warn("Error invoking REST endpoint for {}:\n{}", resource, errOutputMsg); return S3ErrorResponse.createErrorResponse(e, resource); } } @@ -243,6 +254,10 @@ public static S3Exception toObjectS3Exception(Exception exception, String resour } catch (DirectoryNotEmptyException e) { return new S3Exception(e, resource, S3ErrorCode.PRECONDITION_FAILED); } catch (FileDoesNotExistException e) { + if (Pattern.matches(ExceptionMessage.BUCKET_DOES_NOT_EXIST.getMessage(".*"), + e.getMessage())) { + return new S3Exception(e, resource, S3ErrorCode.NO_SUCH_BUCKET); + } return new S3Exception(e, resource, S3ErrorCode.NO_SUCH_KEY); } catch (AccessControlException e) { return new S3Exception(e, resource, S3ErrorCode.ACCESS_DENIED_ERROR); @@ -281,8 +296,8 @@ public static void checkPathIsAlluxioDirectory(FileSystem fs, String bucketPath, try { URIStatus status = fs.getStatus(new AlluxioURI(bucketPath)); if (!status.isFolder()) { - throw new InvalidPathException("Bucket " + bucketPath - + " is not a valid Alluxio directory."); + throw new FileDoesNotExistException( + ExceptionMessage.BUCKET_DOES_NOT_EXIST.getMessage(bucketPath)); } } catch (Exception e) { if (auditContext != null) { @@ -292,6 +307,25 @@ public static void checkPathIsAlluxioDirectory(FileSystem fs, String bucketPath, } } + /** + * Check if a path in alluxio is a directory. + * + * @param fs instance of {@link FileSystem} + * @param bucketPath bucket complete path + * @param auditContext the audit context for exception + * @param bucketPathCache cache the bucket path for a certain time period + */ + public static void checkPathIsAlluxioDirectory(FileSystem fs, String bucketPath, + @Nullable S3AuditContext auditContext, + Cache bucketPathCache) + throws S3Exception { + if (Boolean.TRUE.equals(bucketPathCache.getIfPresent(bucketPath))) { + return; + } + checkPathIsAlluxioDirectory(fs, bucketPath, auditContext); + bucketPathCache.put(bucketPath, true); + } + /** * Fetches and returns the corresponding {@link URIStatus} for both * the multipart upload temp directory and the Alluxio S3 metadata file. @@ -394,7 +428,9 @@ public static FileSystem createFileSystemForUser( final Subject subject = new Subject(); subject.getPrincipals().add(new User(user)); - return FileSystem.Factory.get(subject, fs.getConf()); + // Use local conf to create filesystem rather than fs.getConf() + // due to fs conf will be changed by merged cluster conf. + return FileSystem.Factory.get(subject, Configuration.global()); } /** @@ -539,6 +575,38 @@ public static String getUser(String authorization, ContainerRequestContext reque } } + /** + * Get username from header info from HttpServletRequest. + * + * @param authorization + * @param request + * @return user name + * @throws S3Exception + */ + public static String getUser(String authorization, HttpServletRequest request) + throws S3Exception { + if (S3RestUtils.isAuthenticationEnabled(Configuration.global())) { + return getUserFromSignature(request); + } + try { + return getUserFromAuthorization(authorization, Configuration.global()); + } catch (RuntimeException e) { + throw new S3Exception(new S3ErrorCode(S3ErrorCode.INTERNAL_ERROR.getCode(), + e.getMessage(), S3ErrorCode.INTERNAL_ERROR.getStatus())); + } + } + + private static String getUserFromSignature(HttpServletRequest request) + throws S3Exception { + AwsSignatureProcessor signatureProcessor = new AwsSignatureProcessor(request); + Authenticator authenticator = Authenticator.Factory.create(Configuration.global()); + AwsAuthInfo authInfo = signatureProcessor.getAuthInfo(); + if (authenticator.isAuthenticated(authInfo)) { + return authInfo.getAccessID(); + } + throw new S3Exception(authInfo.toString(), S3ErrorCode.INVALID_IDENTIFIER); + } + /** * Get username from parsed header info. * @@ -608,8 +676,69 @@ public static String getUserFromAuthorization(String authorization, AlluxioConfi } /** - * Comparator based on uri name, treat uri name as a Long number. + * Populate xattr with content type info from header. + * @param xattrMap + * @param contentTypeHeader + */ + public static void populateContentTypeInXAttr(Map xattrMap, + String contentTypeHeader) { + if (contentTypeHeader != null) { + xattrMap.put(S3Constants.CONTENT_TYPE_XATTR_KEY, + ByteString.copyFrom(contentTypeHeader, S3Constants.HEADER_CHARSET)); + } + } + + /** + * Populate xattr map with tagging info from tagging header. + * @param xattrMap + * @param taggingHeader + * @param auditContext + * @param objectPath + * @throws S3Exception + */ + public static void populateTaggingInXAttr(Map xattrMap, String taggingHeader, + S3AuditContext auditContext, String objectPath) + throws S3Exception { + TaggingData tagData = null; + if (taggingHeader != null) { // Parse the tagging header if it exists for PutObject + try { + tagData = S3RestUtils.deserializeTaggingHeader( + taggingHeader, S3Handler.MAX_HEADER_METADATA_SIZE); + } catch (IllegalArgumentException e) { + Throwable cause = e.getCause(); + if (cause instanceof S3Exception) { + throw S3RestUtils.toObjectS3Exception((S3Exception) cause, objectPath, + auditContext); + } + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + LOG.debug("tagData={}", tagData); + // Populate the xattr Map with the metadata tags if provided + if (tagData != null) { + try { + xattrMap.put(S3Constants.TAGGING_XATTR_KEY, TaggingData.serialize(tagData)); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + } + + /** + * Create a rate limiter for given rate. + * @param rate bytes per second + * @return empty if rate <= 0 */ + public static Optional createRateLimiter(long rate) { + if (rate <= 0) { + return Optional.empty(); + } + return Optional.of(RateLimiter.create(rate)); + } + + /** + * Comparator based on uri name, treat uri name as a Long number. + */ public static class URIStatusNameComparator implements Comparator, Serializable { private static final long serialVersionUID = 733270188584155565L; diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/AwsSignatureProcessor.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/AwsSignatureProcessor.java index 4bcaea62dec9..3cb80a615828 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/AwsSignatureProcessor.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/AwsSignatureProcessor.java @@ -25,7 +25,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.HashMap; import java.util.Map; +import javax.servlet.http.HttpServletRequest; import javax.ws.rs.container.ContainerRequestContext; /** @@ -39,7 +41,8 @@ public class AwsSignatureProcessor { LoggerFactory.getLogger(AwsSignatureProcessor.class); private static final String AUTHORIZATION = "Authorization"; - private final ContainerRequestContext mContext; + private ContainerRequestContext mContext; + private HttpServletRequest mServletRequest; /** * Create a new {@link AwsSignatureProcessor}. @@ -50,18 +53,41 @@ public AwsSignatureProcessor(ContainerRequestContext context) { mContext = context; } + /** + * Create a new {@link AwsSignatureProcessor} with HttpServletRequest + * as the info marshall source. + * Used by the new architecture in {@link alluxio.proxy.s3.S3RequestServlet} + * + * @param request + */ + public AwsSignatureProcessor(HttpServletRequest request) { + mServletRequest = request; + } + /** * Extract signature info from request. * @return SignatureInfo * @throws S3Exception */ public SignatureInfo parseSignature() throws S3Exception { - Map headers = S3RestUtils.fromMultiValueToSingleValueMap( - mContext.getHeaders(), true); - String authHeader = headers.get(AUTHORIZATION); - String dateHeader = headers.get(S3_SIGN_DATE); - Map queryParameters = S3RestUtils.fromMultiValueToSingleValueMap( - mContext.getUriInfo().getQueryParameters(), false); + Map queryParameters; + String authHeader; + String dateHeader; + if (mContext != null) { + Map headers = S3RestUtils.fromMultiValueToSingleValueMap( + mContext.getHeaders(), true); + authHeader = headers.get(AUTHORIZATION); + dateHeader = headers.get(S3_SIGN_DATE); + queryParameters = S3RestUtils.fromMultiValueToSingleValueMap( + mContext.getUriInfo().getQueryParameters(), false); + } else { + authHeader = mServletRequest.getHeader(AUTHORIZATION); + dateHeader = mServletRequest.getHeader(S3_SIGN_DATE); + queryParameters = new HashMap<>(); + for (Map.Entry entry : mServletRequest.getParameterMap().entrySet()) { + queryParameters.put(entry.getKey(), entry.getValue()[0]); + } + } SignatureInfo signatureInfo; if ((signatureInfo = @@ -88,8 +114,13 @@ public AwsAuthInfo getAuthInfo() throws S3Exception { SignatureInfo signatureInfo = parseSignature(); String stringToSign = ""; if (signatureInfo.getVersion() == SignatureInfo.Version.V4) { - stringToSign = - StringToSignProducer.createSignatureBase(signatureInfo, mContext); + if (mContext != null) { + stringToSign = + StringToSignProducer.createSignatureBase(signatureInfo, mContext); + } else { + stringToSign = + StringToSignProducer.createSignatureBase(signatureInfo, mServletRequest); + } } String awsAccessId = signatureInfo.getAwsAccessId(); // ONLY validate aws access id when needed. diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/StringToSignProducer.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/StringToSignProducer.java index d47b977a95b5..31f9c3ea171c 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/StringToSignProducer.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/StringToSignProducer.java @@ -36,11 +36,16 @@ import java.time.temporal.ChronoUnit; import java.util.ArrayList; import java.util.Collections; +import java.util.Enumeration; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.StringJoiner; +import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import javax.servlet.http.HttpServletRequest; import javax.ws.rs.container.ContainerRequestContext; /** @@ -83,6 +88,26 @@ public static String createSignatureBase( context.getUriInfo().getQueryParameters(), false)); } + /** + * Convert signature info to strToSign. + * + * @param signatureInfo + * @param request + * @return signature string + * @throws Exception + */ + public static String createSignatureBase( + SignatureInfo signatureInfo, + HttpServletRequest request + ) throws Exception { + return createSignatureBase(signatureInfo, + request.getScheme(), + request.getMethod(), + request.getRequestURI(), + getHeaders(request), + getParameterMap(request)); + } + /** * Convert request info to strToSign. * @@ -139,6 +164,36 @@ public static String createSignatureBase( return strToSign.toString(); } + /** + * Get all headers by given http request, and the result map will ignore case. + * @param request + * @return + */ + private static Map getHeaders(HttpServletRequest request) { + Map result = new TreeMap<>(String::compareToIgnoreCase); + Enumeration headerNames = request.getHeaderNames(); + if (headerNames != null) { + while (headerNames.hasMoreElements()) { + String name = headerNames.nextElement(); + String value = request.getHeader(name); + result.put(name, value); + } + } + return result; + } + + /** + * Get all parameters by given http request, + * if there are multiple values for the same key, the first one will be taken. + * @param request + * @return + */ + private static Map getParameterMap(HttpServletRequest request) { + return request.getParameterMap().entrySet() + .stream() + .collect(Collectors.toMap(Entry::getKey, e -> e.getValue()[0])); + } + /** * Compute a hash for provided string. * @param payload @@ -175,7 +230,6 @@ public static String buildCanonicalRequest( String canonicalUri = getCanonicalUri("/", uri); String canonicalQueryStr = getQueryParamString(queryParams); - System.out.println(canonicalQueryStr); StringBuilder canonicalHeaders = new StringBuilder(); diff --git a/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java b/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java index f876d736de7a..16b8992fc787 100644 --- a/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java +++ b/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java @@ -21,10 +21,19 @@ import alluxio.metrics.MetricsSystem; import alluxio.proxy.ProxyProcess; import alluxio.proxy.s3.CompleteMultipartUploadHandler; +import alluxio.proxy.s3.S3BaseTask; +import alluxio.proxy.s3.S3Handler; +import alluxio.proxy.s3.S3RequestServlet; import alluxio.proxy.s3.S3RestExceptionMapper; +import alluxio.proxy.s3.S3RestUtils; +import alluxio.util.ThreadFactoryUtils; import alluxio.util.io.PathUtils; +import com.google.common.base.Preconditions; import com.google.common.base.Stopwatch; +import com.google.common.util.concurrent.RateLimiter; +import org.eclipse.jetty.server.HttpChannel; +import org.eclipse.jetty.server.Request; import org.eclipse.jetty.servlet.ServletHolder; import org.glassfish.jersey.server.ResourceConfig; import org.glassfish.jersey.servlet.ServletContainer; @@ -34,6 +43,8 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.util.Collections; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import javax.annotation.concurrent.NotThreadSafe; @@ -55,10 +66,26 @@ public final class ProxyWebServer extends WebServer { public static final String SERVER_CONFIGURATION_RESOURCE_KEY = "Server Configuration"; public static final String ALLUXIO_PROXY_AUDIT_LOG_WRITER_KEY = "Alluxio Proxy Audit Log Writer"; + public static final String GLOBAL_RATE_LIMITER_SERVLET_RESOURCE_KEY = "Global Rate Limiter"; + private final RateLimiter mGlobalRateLimiter; private final FileSystem mFileSystem; - private AsyncUserAccessAuditLogWriter mAsyncAuditLogWriter; + public static final String S3_HANDLER_ATTRIBUTE = "Proxy S3 Handler Attribute"; + + class ProxyListener implements HttpChannel.Listener { + public void onComplete(Request request) + { + S3Handler s3Hdlr = (S3Handler) request.getAttribute(S3_HANDLER_ATTRIBUTE); + if (s3Hdlr != null) { + ProxyWebServer.logAccess(s3Hdlr.getServletRequest(), s3Hdlr.getServletResponse(), + s3Hdlr.getStopwatch(), s3Hdlr.getS3Task() != null + ? s3Hdlr.getS3Task().getOPType() : S3BaseTask.OpType.Unknown); + } else { + LOG.info("[ACCESSLOG] Request:{} onComplete.", request); + } + } + } /** * Creates a new instance of {@link ProxyWebServer}. @@ -72,21 +99,29 @@ public ProxyWebServer(String serviceName, InetSocketAddress address, super(serviceName, address); // REST configuration - ResourceConfig config = new ResourceConfig().packages("alluxio.proxy", "alluxio.proxy.s3", - "alluxio.proxy.s3.logging") + String[] packages = {"alluxio.proxy", "alluxio.proxy.s3", + "alluxio.proxy.s3.logging"}; + ResourceConfig config = new ResourceConfig().packages(packages) .register(JacksonProtobufObjectMapperProvider.class) .register(S3RestExceptionMapper.class); mFileSystem = FileSystem.Factory.create(Configuration.global()); + long rate = + (long) Configuration.getInt(PropertyKey.PROXY_S3_GLOBAL_READ_RATE_LIMIT_MB) * Constants.MB; + mGlobalRateLimiter = S3RestUtils.createRateLimiter(rate).orElse(null); - if (Configuration.getBoolean(PropertyKey.PROXY_AUDIT_LOGGING_ENABLED)) { - mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("PROXY_AUDIT_LOG"); - mAsyncAuditLogWriter.start(); - MetricsSystem.registerGaugeIfAbsent( - MetricKey.PROXY_AUDIT_LOG_ENTRIES_SIZE.getName(), - () -> mAsyncAuditLogWriter != null - ? mAsyncAuditLogWriter.getAuditLogEntriesSize() : -1); - } + /** + * The audit logger will be running all the time, and an operation checks whether + * to enable audit logs in {@link alluxio.proxy.s3.S3RestServiceHandler#createAuditContext} and + * {@link alluxio.proxy.s3.S3Handler#createAuditContext}. So audit log can be turned on/off + * at runtime by updating the property key. + */ + mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("PROXY_AUDIT_LOG"); + mAsyncAuditLogWriter.start(); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.PROXY_AUDIT_LOG_ENTRIES_SIZE.getName(), + () -> mAsyncAuditLogWriter != null + ? mAsyncAuditLogWriter.getAuditLogEntriesSize() : -1); ServletContainer servlet = new ServletContainer(config) { private static final long serialVersionUID = 7756010860672831556L; @@ -100,25 +135,95 @@ public void init() throws ServletException { getServletContext().setAttribute(STREAM_CACHE_SERVLET_RESOURCE_KEY, new StreamCache(Configuration.getMs(PropertyKey.PROXY_STREAM_CACHE_TIMEOUT_MS))); getServletContext().setAttribute(ALLUXIO_PROXY_AUDIT_LOG_WRITER_KEY, mAsyncAuditLogWriter); + if (mGlobalRateLimiter != null) { + getServletContext().setAttribute(GLOBAL_RATE_LIMITER_SERVLET_RESOURCE_KEY, + mGlobalRateLimiter); + } } @Override public void service(final ServletRequest req, final ServletResponse res) - throws ServletException, IOException { + throws ServletException, IOException { Stopwatch stopWatch = Stopwatch.createStarted(); super.service(req, res); if ((req instanceof HttpServletRequest) && (res instanceof HttpServletResponse)) { HttpServletRequest httpReq = (HttpServletRequest) req; HttpServletResponse httpRes = (HttpServletResponse) res; - logAccess(httpReq, httpRes, stopWatch); + logAccess(httpReq, httpRes, stopWatch, null); } } }; - ServletHolder servletHolder = new ServletHolder("Alluxio Proxy Web Service", servlet); - mServletContextHandler - .addServlet(servletHolder, PathUtils.concatPath(Constants.REST_API_PREFIX, "*")); - // TODO(czhu): Move S3 API logging out of CompleteMultipartUploadHandler into a logging handler + + if (Configuration.getBoolean(PropertyKey.PROXY_S3_V2_VERSION_ENABLED)) { + super.getServerConnector().addBean(new ProxyListener()); + ServletHolder s3ServletHolder = new ServletHolder("Alluxio Proxy V2 S3 Service", + new S3RequestServlet() { + @Override + public void init() throws ServletException { + super.init(); + getServletContext().setAttribute(ALLUXIO_PROXY_SERVLET_RESOURCE_KEY, proxyProcess); + getServletContext() + .setAttribute(FILE_SYSTEM_SERVLET_RESOURCE_KEY, mFileSystem); + getServletContext().setAttribute(STREAM_CACHE_SERVLET_RESOURCE_KEY, + new StreamCache(Configuration.getMs(PropertyKey.PROXY_STREAM_CACHE_TIMEOUT_MS))); + getServletContext().setAttribute(ALLUXIO_PROXY_AUDIT_LOG_WRITER_KEY, + mAsyncAuditLogWriter); + getServletContext().setAttribute(PROXY_S3_V2_LIGHT_POOL, createLightThreadPool()); + getServletContext().setAttribute(PROXY_S3_V2_HEAVY_POOL, createHeavyThreadPool()); + } + }); + mServletContextHandler + .addServlet(s3ServletHolder, PathUtils.concatPath(Constants.REST_API_PREFIX, "*")); + return; + } addHandler(new CompleteMultipartUploadHandler(mFileSystem, Constants.REST_API_PREFIX)); + ServletHolder rsServletHolder = new ServletHolder("Alluxio Proxy Web Service", servlet); + mServletContextHandler + .addServlet(rsServletHolder, PathUtils.concatPath(Constants.REST_API_PREFIX, "*")); + } + + private ThreadPoolExecutor createLightThreadPool() { + int lightCorePoolSize = Configuration.getInt( + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_CORE_THREAD_NUMBER); + Preconditions.checkArgument(lightCorePoolSize > 0, + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_CORE_THREAD_NUMBER.getName() + + " must be a positive integer."); + int lightMaximumPoolSize = Configuration.getInt( + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_MAXIMUM_THREAD_NUMBER); + Preconditions.checkArgument(lightMaximumPoolSize >= lightCorePoolSize, + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_MAXIMUM_THREAD_NUMBER.getName() + + " must be greater than or equal to the value of " + + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_CORE_THREAD_NUMBER.getName()); + int lightPoolQueueSize = Configuration.getInt( + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_QUEUE_SIZE); + Preconditions.checkArgument(lightPoolQueueSize > 0, + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_QUEUE_SIZE.getName() + + " must be a positive integer."); + return new ThreadPoolExecutor(lightCorePoolSize, lightMaximumPoolSize, 0, + TimeUnit.SECONDS, new ArrayBlockingQueue<>(lightPoolQueueSize), + ThreadFactoryUtils.build("S3-LIGHTPOOL-%d", false)); + } + + private ThreadPoolExecutor createHeavyThreadPool() { + int heavyCorePoolSize = Configuration.getInt( + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_CORE_THREAD_NUMBER); + Preconditions.checkArgument(heavyCorePoolSize > 0, + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_CORE_THREAD_NUMBER.getName() + + " must be a positive integer."); + int heavyMaximumPoolSize = Configuration.getInt( + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_MAXIMUM_THREAD_NUMBER); + Preconditions.checkArgument(heavyMaximumPoolSize >= heavyCorePoolSize, + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_MAXIMUM_THREAD_NUMBER.getName() + + " must be greater than or equal to the value of " + + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_CORE_THREAD_NUMBER.getName()); + int heavyPoolQueueSize = Configuration.getInt( + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_QUEUE_SIZE); + Preconditions.checkArgument(heavyPoolQueueSize > 0, + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_QUEUE_SIZE.getName() + + " must be a positive integer."); + return new ThreadPoolExecutor(heavyCorePoolSize, heavyMaximumPoolSize, 0, + TimeUnit.SECONDS, new ArrayBlockingQueue<>(heavyPoolQueueSize), + ThreadFactoryUtils.build("S3-HEAVYPOOL-%d", false)); } @Override @@ -136,18 +241,19 @@ public void stop() throws Exception { * @param request * @param response * @param stopWatch + * @param opType */ public static void logAccess(HttpServletRequest request, HttpServletResponse response, - Stopwatch stopWatch) { + Stopwatch stopWatch, S3BaseTask.OpType opType) { String contentLenStr = "None"; if (request.getHeader("x-amz-decoded-content-length") != null) { contentLenStr = request.getHeader("x-amz-decoded-content-length"); } else if (request.getHeader("Content-Length") != null) { contentLenStr = request.getHeader("Content-Length"); } - String accessLog = String.format("[ACCESSLOG] Request:%s - Status:%d " + String accessLog = String.format("[ACCESSLOG] %s Request:%s - Status:%d " + "- ContentLength:%s - Elapsed(ms):%d", - request, response.getStatus(), + (opType == null ? "" : opType), request, response.getStatus(), contentLenStr, stopWatch.elapsed(TimeUnit.MILLISECONDS)); if (LOG.isDebugEnabled()) { String requestHeaders = Collections.list(request.getHeaderNames()).stream() diff --git a/core/server/proxy/src/test/java/alluxio/proxy/s3/RateLimitInputStreamTest.java b/core/server/proxy/src/test/java/alluxio/proxy/s3/RateLimitInputStreamTest.java new file mode 100644 index 000000000000..93353f167f30 --- /dev/null +++ b/core/server/proxy/src/test/java/alluxio/proxy/s3/RateLimitInputStreamTest.java @@ -0,0 +1,117 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import static alluxio.Constants.KB; +import static alluxio.Constants.MB; + +import com.google.common.util.concurrent.RateLimiter; +import org.apache.commons.io.IOUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.UUID; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.FutureTask; +import java.util.stream.Collectors; + +public class RateLimitInputStreamTest { + + private byte[] mData; + + @Before + public void init() throws IOException { + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(MB); + int count = 0; + while (count < MB) { + byte[] bytes = UUID.randomUUID().toString().getBytes(); + byteArrayOutputStream.write(bytes); + count += bytes.length; + } + mData = Arrays.copyOf(byteArrayOutputStream.toByteArray(), MB); + } + + @Test + public void testSingleThreadRead() throws IOException { + Random random = new Random(); + for (int i = 1; i <= 5; i++) { + long rate1 = (random.nextInt(4) + 1) * 100 * KB; + long rate2 = (random.nextInt(4) + 1) * 100 * KB; + ByteArrayInputStream inputStream = new ByteArrayInputStream(mData); + RateLimitInputStream rateLimitInputStream = new RateLimitInputStream(inputStream, + RateLimiter.create(rate1), RateLimiter.create(rate2)); + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(MB); + long start = System.currentTimeMillis(); + IOUtils.copy(rateLimitInputStream, byteArrayOutputStream, KB); + long end = System.currentTimeMillis(); + long duration = end - start; + long expectedDuration = MB / Math.min(rate1, rate2) * 1000; + Assert.assertTrue(duration >= expectedDuration && duration <= expectedDuration + 1000); + Assert.assertArrayEquals(mData, byteArrayOutputStream.toByteArray()); + } + } + + private void testMultiThreadRead(long globalRate, long rate, int threadNum) { + long totalSize = (long) threadNum * mData.length; + RateLimiter globalRateLimiter = RateLimiter.create(globalRate); + ExecutorService threadPool = Executors.newFixedThreadPool(threadNum); + List> tasks = new ArrayList<>(); + for (int i = 1; i <= threadNum; i++) { + tasks.add(new FutureTask<>(() -> { + ByteArrayInputStream inputStream = new ByteArrayInputStream(mData); + RateLimitInputStream rateLimitInputStream = new RateLimitInputStream(inputStream, + RateLimiter.create(rate), globalRateLimiter); + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(MB); + IOUtils.copy(rateLimitInputStream, byteArrayOutputStream, KB); + return byteArrayOutputStream.toByteArray(); + })); + } + long start = System.currentTimeMillis(); + tasks.forEach(threadPool::submit); + List results; + try { + results = tasks.stream().map(task -> { + try { + return task.get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }).collect(Collectors.toList()); + } finally { + threadPool.shutdownNow(); + } + long end = System.currentTimeMillis(); + long duration = end - start; + long expectedDuration = totalSize / Math.min(globalRate, (long) threadNum * rate) * 1000; + Assert.assertTrue(duration >= expectedDuration && duration <= expectedDuration + 1000); + results.forEach(bytes -> Assert.assertArrayEquals(mData, bytes)); + } + + @Test + public void testMultiThreadReadWithBiggerGlobalRate() { + testMultiThreadRead(400 * KB, 100 * KB, 3); + } + + @Test + public void testMultiThreadReadWithSmallerGlobalRate() { + testMultiThreadRead(100 * KB, 200 * KB, 3); + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java b/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java index 602feb6bac11..69e367b21fc6 100644 --- a/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java +++ b/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java @@ -13,6 +13,7 @@ import alluxio.AlluxioURI; import alluxio.Constants; +import alluxio.ProjectConstants; import alluxio.RestUtils; import alluxio.RuntimeConstants; import alluxio.client.file.FileSystem; @@ -29,6 +30,7 @@ import alluxio.master.block.BlockId; import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; +import alluxio.util.CommonUtils; import alluxio.util.ConfigurationUtils; import alluxio.util.FormatUtils; import alluxio.util.LogUtils; @@ -47,6 +49,7 @@ import alluxio.wire.WorkerWebUIInit; import alluxio.wire.WorkerWebUILogs; import alluxio.wire.WorkerWebUIMetrics; +import alluxio.wire.WorkerWebUIOperations; import alluxio.wire.WorkerWebUIOverview; import alluxio.worker.block.BlockStoreMeta; import alluxio.worker.block.BlockWorker; @@ -106,6 +109,7 @@ public final class AlluxioWorkerRestServiceHandler { // endpoints public static final String GET_INFO = "info"; + public static final String GET_OPERATIONS = "operations"; // webui endpoints // TODO(william): DRY up these enpoints public static final String WEBUI_INIT = "webui_init"; @@ -163,10 +167,62 @@ public Response getInfo(@QueryParam(QUERY_RAW_CONFIGURATION) final Boolean rawCo .setRpcAddress(mWorkerProcess.getRpcAddress().toString()) .setStartTimeMs(mWorkerProcess.getStartTimeMs()) .setTierCapacity(getTierCapacityInternal()).setTierPaths(getTierPathsInternal()) - .setUptimeMs(mWorkerProcess.getUptimeMs()).setVersion(RuntimeConstants.VERSION); + .setUptimeMs(mWorkerProcess.getUptimeMs()) + .setVersion(RuntimeConstants.VERSION) + .setRevision(ProjectConstants.REVISION); }, Configuration.global()); } + /** + * Gets the current active operations count in the worker. + * + * @return the response + */ + @GET + @Path(GET_OPERATIONS) + public Response getActiveOperations() { + return RestUtils.call(() -> { + WorkerWebUIOperations response = new WorkerWebUIOperations(); + /* + * This contains running operations in: + * 1. Worker RPC thread pool, for ongoing RPCs + * 2. GrpcExecutors.BLOCK_READER_EXECUTOR, for block readers + * 3. GrpcExecutors.BLOCK_READER_SERIALIZED_RUNNER_EXECUTOR, for replying to the client + * 4. GrpcExecutors.BLOCK_WRITER_EXECUTOR, for block writers + * + * So this is the number of operations actively running in the thread pools. + * In other to know the total accepted but not finished request, we need to consider the + * thread pool task queues. + */ + long operations = MetricsSystem.counter( + MetricKey.WORKER_ACTIVE_OPERATIONS.getName()).getCount(); + /* + * Only the RPC thread pool can have a meaningful length. The other block reader/writer + * thread pools all have 0/1 queue length and create threads immediately when there is + * a request. So we only need to consider the RPC pool queue length for idleness. + */ + String workerRpcPoolSizeGaugeName = MetricKey.WORKER_RPC_QUEUE_LENGTH.getName(); + long rpcQueueSize = getGaugeValue(workerRpcPoolSizeGaugeName); + response.setOperationCount(operations) + .setRpcQueueLength(rpcQueueSize); + LOG.debug("Checking worker activity: {}", response); + return response; + }, Configuration.global()); + } + + // Cast to long to safely handle all gauges + private static long getGaugeValue(String gaugeName) { + try { + Gauge gauge = MetricsSystem.METRIC_REGISTRY.gauge(gaugeName, null); + // Carefully cast here because Integer cannot be cast to Long directly + return ((Number) gauge.getValue()).longValue(); + } catch (Exception e) { + LOG.error("Incorrect gauge name {}. Available names are: {}", + gaugeName, MetricsSystem.METRIC_REGISTRY.getGauges().keySet(), e); + return 0; + } + } + /** * Gets Web UI initialization data. * @@ -227,7 +283,8 @@ public Response getWebUIOverview() { response.setCapacityBytes(FormatUtils.getSizeFromBytes(capacityBytes)) .setUsedBytes(FormatUtils.getSizeFromBytes(usedBytes)).setUsageOnTiers(usageOnTiers) .setBlockCount(Long.toString(storeMeta.getNumberOfBlocks())) - .setVersion(RuntimeConstants.VERSION); + .setVersion(RuntimeConstants.VERSION) + .setRevision(ProjectConstants.REVISION); List storageDirs = new ArrayList<>(storeMeta.getCapacityBytesOnDirs().size()); for (Pair tierAndDirPath : storeMeta.getCapacityBytesOnDirs().keySet()) { @@ -407,7 +464,8 @@ public Response getWebUILogs(@DefaultValue("") @QueryParam("path") String reques @QueryParam("end") String requestEnd, @DefaultValue("20") @QueryParam("limit") String requestLimit) { return RestUtils.call(() -> { - FilenameFilter filenameFilter = (dir, name) -> name.toLowerCase().endsWith(".log"); + FilenameFilter filenameFilter = (dir, name) -> + Constants.LOG_FILE_PATTERN.matcher(name.toLowerCase()).matches(); WorkerWebUILogs response = new WorkerWebUILogs(); if (!Configuration.getBoolean(PropertyKey.WEB_FILE_INFO_ENABLED)) { @@ -534,23 +592,30 @@ public Response getWebUILogs(@DefaultValue("") @QueryParam("path") String reques public Response getWebUIConfiguration() { return RestUtils.call(() -> { WorkerWebUIConfiguration response = new WorkerWebUIConfiguration(); - response.setWhitelist(mBlockWorker.getWhiteList()); + response.setWhitelist(mBlockWorker.getWhiteList()); + alluxio.wire.Configuration conf = mBlockWorker.getConfiguration( + GetConfigurationPOptions.newBuilder().setRawValue(true).build()); TreeSet> sortedProperties = new TreeSet<>(); - Set alluxioConfExcludes = Sets.newHashSet(PropertyKey.WORKER_WHITELIST.toString()); - for (ConfigProperty configProperty : mBlockWorker - .getConfiguration(GetConfigurationPOptions.newBuilder().setRawValue(true).build()) - .toProto().getClusterConfigsList()) { + Set alluxioConfExcludes = Sets.newHashSet(PropertyKey.MASTER_WHITELIST.toString()); + for (ConfigProperty configProperty : conf.toProto().getClusterConfigsList()) { String confName = configProperty.getName(); if (!alluxioConfExcludes.contains(confName)) { sortedProperties.add(new ImmutableTriple<>(confName, - ConfigurationUtils.valueAsString(configProperty.getValue()), - configProperty.getSource())); + ConfigurationUtils.valueAsString(configProperty.getValue()), + configProperty.getSource())); } } response.setConfiguration(sortedProperties); - + response.setClusterConfigHash(conf.getClusterConfHash()); + response.setPathConfigHash(conf.getPathConfHash()); + response.setClusterConfigLastUpdateTime( + CommonUtils.convertMsToDate(conf.getClusterConfLastUpdateTime(), + alluxio.conf.Configuration.getString(PropertyKey.USER_DATE_FORMAT_PATTERN))); + response.setPathConfigLastUpdateTime( + CommonUtils.convertMsToDate(conf.getPathConfLastUpdateTime(), + alluxio.conf.Configuration.getString(PropertyKey.USER_DATE_FORMAT_PATTERN))); return response; }, Configuration.global()); } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/AbstractBlockStoreEventListener.java b/core/server/worker/src/main/java/alluxio/worker/block/AbstractBlockStoreEventListener.java index dc99e9406b7d..b35475bc06c7 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/AbstractBlockStoreEventListener.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/AbstractBlockStoreEventListener.java @@ -30,7 +30,10 @@ public void onAccessBlock(long blockId, BlockStoreLocation location) {} public void onAbortBlock(long blockId) {} @Override - public void onCommitBlock(long blockId, BlockStoreLocation location) {} + public void onCommitBlockToLocal(long blockId, BlockStoreLocation location) {} + + @Override + public void onCommitBlockToMaster(long blockId, BlockStoreLocation location) {} @Override public void onMoveBlockByClient(long blockId, BlockStoreLocation oldLocation, diff --git a/core/server/worker/src/main/java/alluxio/worker/block/AllMasterRegistrationBlockWorker.java b/core/server/worker/src/main/java/alluxio/worker/block/AllMasterRegistrationBlockWorker.java new file mode 100644 index 000000000000..64acbb337e08 --- /dev/null +++ b/core/server/worker/src/main/java/alluxio/worker/block/AllMasterRegistrationBlockWorker.java @@ -0,0 +1,78 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import alluxio.Sessions; +import alluxio.wire.WorkerNetAddress; +import alluxio.worker.file.FileSystemMasterClient; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * The class is responsible for managing all top level components of BlockWorker. + * + * This block worker implementation register workers to all masters. + */ +@NotThreadSafe +public class AllMasterRegistrationBlockWorker extends DefaultBlockWorker { + private static final Logger LOG = LoggerFactory.getLogger(AllMasterRegistrationBlockWorker.class); + private BlockSyncMasterGroup mBlockSyncMasterGroup; + + /** + * Constructs a block worker when workers register to all masters. + * + * @param blockMasterClientPool a client pool for talking to the block master + * @param fileSystemMasterClient a client for talking to the file system master + * @param sessions an object for tracking and cleaning up client sessions + * @param blockStore an Alluxio block store + * @param workerId worker id + */ + public AllMasterRegistrationBlockWorker( + BlockMasterClientPool blockMasterClientPool, + FileSystemMasterClient fileSystemMasterClient, Sessions sessions, + BlockStore blockStore, AtomicReference workerId) { + super(blockMasterClientPool, fileSystemMasterClient, sessions, blockStore, workerId); + } + + @Override + protected void setupBlockMasterSync() { + mBlockSyncMasterGroup = + BlockSyncMasterGroup.Factory.createAllMasterSync(this); + mResourceCloser.register(mBlockSyncMasterGroup); + mBlockSyncMasterGroup.start(getExecutorService()); + } + + @Override + public void start(WorkerNetAddress address) throws IOException { + super.start(address); + + InetSocketAddress primaryMasterAddress = + (InetSocketAddress) mFileSystemMasterClient.getRemoteSockAddress(); + // Registrations on standby masters are not required to complete for starting a worker + // because standby masters do not serve read requests. + // Standby masters will catch up following block location changes via worker heartbeats. + mBlockSyncMasterGroup.waitForPrimaryMasterRegistrationComplete(primaryMasterAddress); + } + + /** + * @return the block sync master group + */ + public BlockSyncMasterGroup getBlockSyncMasterGroup() { + return mBlockSyncMasterGroup; + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockHeartbeatReporter.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockHeartbeatReporter.java index 5c0c7b52923c..fd0a1e24ece2 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockHeartbeatReporter.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockHeartbeatReporter.java @@ -11,23 +11,32 @@ package alluxio.worker.block; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; + import com.google.common.collect.Lists; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import javax.annotation.concurrent.ThreadSafe; /** * Represents the delta of the block store within one heartbeat period. For now, newly committed * blocks do not pass through this master communication mechanism, instead it is synchronized - * through {@link alluxio.worker.block.BlockWorker#commitBlock(long, long)}. + * through {@link alluxio.worker.block.BlockWorker#commitBlock(long, long, boolean)}. */ @ThreadSafe -public final class BlockHeartbeatReporter extends AbstractBlockStoreEventListener { +public class BlockHeartbeatReporter extends AbstractBlockStoreEventListener { + private static final Logger LOG = LoggerFactory.getLogger(BlockHeartbeatReporter.class); + /** Lock for operations on the removed and added block collections. */ private final Object mLock; @@ -46,6 +55,9 @@ public final class BlockHeartbeatReporter extends AbstractBlockStoreEventListene */ private final Map> mLostStorage; + private final boolean mWorkerRegisterToAllMasters = + Configuration.getBoolean(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS); + /** * Creates a new instance of {@link BlockHeartbeatReporter}. */ @@ -54,15 +66,15 @@ public BlockHeartbeatReporter() { mRemovedBlocks = new ArrayList<>(100); mAddedBlocks = new HashMap<>(20); mLostStorage = new HashMap<>(); + LOG.debug("BlockHeartbeatReporter initialized"); } /** - * Generates the report of the block store delta in the last heartbeat period. Calling this method - * marks the end of a period and the start of a new heartbeat period. + * Generates the report of the report and clear the states. * * @return the block store delta report for the last heartbeat period */ - public BlockHeartbeatReport generateReport() { + public BlockHeartbeatReport generateReportAndClear() { synchronized (mLock) { BlockHeartbeatReport report = new BlockHeartbeatReport(mAddedBlocks, mRemovedBlocks, mLostStorage); @@ -74,6 +86,73 @@ public BlockHeartbeatReport generateReport() { } } + /** + * Clears the internal states of the reporter. + */ + public void clear() { + synchronized (mLock) { + mAddedBlocks.clear(); + mRemovedBlocks.clear(); + mLostStorage.clear(); + } + } + + /** + * Merges back the cleared block lists/maps given a generated report. + * used when the worker heartbeat rpc fails. + * + * @param previousReport the previous generated report + */ + public void mergeBack(BlockHeartbeatReport previousReport) { + synchronized (mLock) { + Set removedBlocksSet = new HashSet<>(mRemovedBlocks); + for (Entry> addedBlockEntry: + previousReport.getAddedBlocks().entrySet()) { + List blockIds = addedBlockEntry.getValue(); + // Two pass scans to avoid creating too many ephemeral objects + // given that adding a block then removing it is unlikely. + boolean needToRemoveBlock = false; + for (long blockId: blockIds) { + if (removedBlocksSet.contains(blockId)) { + needToRemoveBlock = true; + break; + } + } + final List blockIdsToAdd; + if (!needToRemoveBlock) { + blockIdsToAdd = blockIds; + } else { + blockIdsToAdd = new ArrayList<>(); + for (long blockId: blockIds) { + if (!removedBlocksSet.contains(blockId)) { + blockIdsToAdd.add(blockId); + } + } + } + if (blockIdsToAdd.size() == 0) { + continue; + } + if (mAddedBlocks.containsKey(addedBlockEntry.getKey())) { + mAddedBlocks.get(addedBlockEntry.getKey()).addAll(blockIdsToAdd); + } else { + mAddedBlocks.put(addedBlockEntry.getKey(), blockIdsToAdd); + } + } + for (Map.Entry> lostStorageEntry: + previousReport.getLostStorage().entrySet()) { + if (lostStorageEntry.getValue().size() == 0) { + continue; + } + if (mLostStorage.containsKey(lostStorageEntry.getKey())) { + mLostStorage.get(lostStorageEntry.getKey()).addAll(lostStorageEntry.getValue()); + } else { + mLostStorage.put(lostStorageEntry.getKey(), lostStorageEntry.getValue()); + } + } + mRemovedBlocks.addAll(previousReport.getRemovedBlocks()); + } + } + @Override public void onMoveBlockByClient(long blockId, BlockStoreLocation oldLocation, BlockStoreLocation newLocation) { diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClient.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClient.java index ced8f6efe6db..657051b3f4c0 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClient.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClient.java @@ -33,11 +33,13 @@ import alluxio.grpc.GrpcUtils; import alluxio.grpc.LocationBlockIdListEntry; import alluxio.grpc.Metric; +import alluxio.grpc.NotifyWorkerIdPRequest; import alluxio.grpc.RegisterWorkerPOptions; import alluxio.grpc.RegisterWorkerPRequest; import alluxio.grpc.ServiceType; import alluxio.grpc.StorageList; import alluxio.master.MasterClientContext; +import alluxio.master.selectionpolicy.MasterSelectionPolicy; import alluxio.retry.RetryPolicy; import alluxio.wire.WorkerNetAddress; @@ -46,6 +48,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -74,6 +77,17 @@ public BlockMasterClient(MasterClientContext conf) { super(conf); } + /** + * Creates a new instance of {@link BlockMasterClient} for the worker and + * connects to a specific master. + * + * @param conf master client configuration + * @param address the master address + */ + public BlockMasterClient(MasterClientContext conf, InetSocketAddress address) { + super(conf, MasterSelectionPolicy.Factory.specifiedMaster(address)); + } + @Override protected ServiceType getRemoteServiceType() { return ServiceType.BLOCK_MASTER_WORKER_SERVICE; @@ -296,7 +310,9 @@ public void register(final long workerId, final List storageTierAliases, final RegisterWorkerPOptions options = RegisterWorkerPOptions.newBuilder().addAllConfigs(configList) - .setBuildVersion(buildVersion).build(); + .setBuildVersion(buildVersion) + .setNumVCpu(Runtime.getRuntime().availableProcessors()) + .build(); final List currentBlocks = convertBlockListMapToProto(currentBlocksOnLocation); @@ -310,7 +326,8 @@ public void register(final long workerId, final List storageTierAliases, .putAllUsedBytesOnTiers(usedBytesOnTiers) .addAllCurrentBlocks(currentBlocks) .putAllLostStorage(lostStorageMap) - .setOptions(options).build(); + .setOptions(options) + .build(); retryRPC(() -> { mClient.registerWorker(request); @@ -359,4 +376,20 @@ public void registerWithStream(final long workerId, final List storageTi throw ioe.get(); } } + + /** + * Notify all masters about the worker ID. + * @param workerId the worker id + * @param address the worker address + */ + public void notifyWorkerId(long workerId, WorkerNetAddress address) throws IOException { + retryRPC(() -> { + LOG.info("Notifying workerID to master {} with workerId {}, workerAddress {}", + mServerAddress, + workerId, + address); + return mClient.notifyWorkerId(NotifyWorkerIdPRequest.newBuilder() + .setWorkerId(workerId).setWorkerNetAddress(GrpcUtils.toProto(address)).build()); + }, LOG, "NotifyWorkerId", "workerId=%d, workerAddress=%s", workerId, address); + } } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClientPool.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClientPool.java index e9ece84e2fc9..3c14b07be555 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClientPool.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClientPool.java @@ -17,11 +17,14 @@ import alluxio.master.MasterClientContext; import alluxio.resource.ResourcePool; +import com.google.common.annotations.VisibleForTesting; import com.google.common.io.Closer; import java.io.IOException; +import java.net.InetSocketAddress; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; +import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; /** @@ -34,14 +37,41 @@ public class BlockMasterClientPool extends ResourcePool { private final Queue mClientList; private final MasterClientContext mMasterContext; + /** If not specified, the client pool will create clients connecting to the primary master. **/ + @Nullable + private final InetSocketAddress mMasterAddress; + + /** + * A factory class for testing purpose. + */ + @VisibleForTesting + static class Factory { + BlockMasterClientPool create() { + return new BlockMasterClientPool(); + } + + BlockMasterClientPool create(@Nullable InetSocketAddress address) { + return new BlockMasterClientPool(address); + } + } + /** * Creates a new block master client pool. */ public BlockMasterClientPool() { + this(null); + } + + /** + * Creates a new block master client pool. + * @param address the block master address + */ + public BlockMasterClientPool(@Nullable InetSocketAddress address) { super(Configuration.getInt(PropertyKey.WORKER_BLOCK_MASTER_CLIENT_POOL_SIZE)); mClientList = new ConcurrentLinkedQueue<>(); mMasterContext = MasterClientContext .newBuilder(ClientContext.create(Configuration.global())).build(); + mMasterAddress = address; } @Override @@ -56,7 +86,14 @@ public void close() throws IOException { @Override public BlockMasterClient createNewResource() { - BlockMasterClient client = new BlockMasterClient(mMasterContext); + final BlockMasterClient client; + if (mMasterAddress != null) { + // If an address is specified, that means all clients in this pool connect + // to the specific master no matter it is a primary or standby + client = new BlockMasterClient(mMasterContext, mMasterAddress); + } else { + client = new BlockMasterClient(mMasterContext); + } mClientList.add(client); return client; } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSync.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSync.java index da898ccd9405..e9572f7b448f 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSync.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSync.java @@ -11,27 +11,21 @@ package alluxio.worker.block; +import alluxio.Constants; import alluxio.ProcessUtils; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.ConnectionFailedException; import alluxio.exception.FailedToAcquireRegisterLeaseException; import alluxio.grpc.Command; -import alluxio.grpc.ConfigProperty; -import alluxio.grpc.Scope; import alluxio.heartbeat.HeartbeatExecutor; -import alluxio.metrics.MetricsSystem; -import alluxio.retry.ExponentialTimeBoundedRetry; -import alluxio.retry.RetryPolicy; +import alluxio.util.logging.SamplingLogger; import alluxio.wire.WorkerNetAddress; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.time.Duration; -import java.time.temporal.ChronoUnit; -import java.util.List; import java.util.concurrent.atomic.AtomicReference; import javax.annotation.concurrent.NotThreadSafe; @@ -51,10 +45,7 @@ @NotThreadSafe public final class BlockMasterSync implements HeartbeatExecutor { private static final Logger LOG = LoggerFactory.getLogger(BlockMasterSync.class); - private static final long ACQUIRE_LEASE_WAIT_BASE_SLEEP_MS = - Configuration.getMs(PropertyKey.WORKER_REGISTER_LEASE_RETRY_SLEEP_MIN); - private static final long ACQUIRE_LEASE_WAIT_MAX_SLEEP_MS = - Configuration.getMs(PropertyKey.WORKER_REGISTER_LEASE_RETRY_SLEEP_MAX); + private static final Logger SAMPLING_LOG = new SamplingLogger(LOG, 30L * Constants.SECOND); private static final long ACQUIRE_LEASE_WAIT_MAX_DURATION = Configuration.getMs(PropertyKey.WORKER_REGISTER_LEASE_RETRY_MAX_DURATION); private static final int HEARTBEAT_TIMEOUT_MS = @@ -80,6 +71,9 @@ public final class BlockMasterSync implements HeartbeatExecutor { /** Last System.currentTimeMillis() timestamp when a heartbeat successfully completed. */ private long mLastSuccessfulHeartbeatMs; + /** The helper instance for sync related methods. */ + private final BlockMasterSyncHelper mBlockMasterSyncHelper; + /** * Creates a new instance of {@link BlockMasterSync}. * @@ -96,99 +90,43 @@ public BlockMasterSync(BlockWorker blockWorker, AtomicReference workerId, mMasterClientPool = masterClientPool; mMasterClient = mMasterClientPool.acquire(); mAsyncBlockRemover = new AsyncBlockRemover(mBlockWorker); + mBlockMasterSyncHelper = new BlockMasterSyncHelper(mMasterClient); registerWithMaster(); mLastSuccessfulHeartbeatMs = System.currentTimeMillis(); } - /** - * Gets the default retry policy for acquiring a {@link alluxio.wire.RegisterLease} - * from the BlockMaster. - * - * @return the policy to use - */ - public static RetryPolicy getDefaultAcquireLeaseRetryPolicy() { - return ExponentialTimeBoundedRetry.builder() - .withMaxDuration(Duration.of(ACQUIRE_LEASE_WAIT_MAX_DURATION, ChronoUnit.MILLIS)) - .withInitialSleep(Duration.of(ACQUIRE_LEASE_WAIT_BASE_SLEEP_MS, ChronoUnit.MILLIS)) - .withMaxSleep(Duration.of(ACQUIRE_LEASE_WAIT_MAX_SLEEP_MS, ChronoUnit.MILLIS)) - .withSkipInitialSleep() - .build(); - } - /** * Registers with the Alluxio master. This should be called before the * continuous heartbeat thread begins. */ private void registerWithMaster() throws IOException { BlockStoreMeta storeMeta = mBlockWorker.getStoreMetaFull(); - List configList = - Configuration.getConfiguration(Scope.WORKER); - - boolean leaseRequired = Configuration.getBoolean(PropertyKey.WORKER_REGISTER_LEASE_ENABLED); - if (leaseRequired) { - LOG.info("Acquiring a RegisterLease from the master before registering"); - try { - mMasterClient.acquireRegisterLeaseWithBackoff(mWorkerId.get(), - storeMeta.getNumberOfBlocks(), - getDefaultAcquireLeaseRetryPolicy()); - LOG.info("Lease acquired"); - } catch (FailedToAcquireRegisterLeaseException e) { - mMasterClient.disconnect(); - if (Configuration.getBoolean(PropertyKey.TEST_MODE)) { - throw new RuntimeException(String.format("Master register lease timeout exceeded: %dms", - ACQUIRE_LEASE_WAIT_MAX_DURATION)); - } - ProcessUtils.fatalError(LOG, "Master register lease timeout exceeded: %dms", - ACQUIRE_LEASE_WAIT_MAX_DURATION); + try { + mBlockMasterSyncHelper.tryAcquireLease(mWorkerId.get(), storeMeta); + } catch (FailedToAcquireRegisterLeaseException e) { + mMasterClient.disconnect(); + if (Configuration.getBoolean(PropertyKey.TEST_MODE)) { + throw new RuntimeException(String.format("Master register lease timeout exceeded: %dms", + ACQUIRE_LEASE_WAIT_MAX_DURATION)); } + ProcessUtils.fatalError(LOG, "Master register lease timeout exceeded: %dms", + ACQUIRE_LEASE_WAIT_MAX_DURATION); } - - boolean useStreaming = Configuration.getBoolean(PropertyKey.WORKER_REGISTER_STREAM_ENABLED); - if (useStreaming) { - mMasterClient.registerWithStream(mWorkerId.get(), - storeMeta.getStorageTierAssoc().getOrderedStorageAliases(), - storeMeta.getCapacityBytesOnTiers(), - storeMeta.getUsedBytesOnTiers(), storeMeta.getBlockListByStorageLocation(), - storeMeta.getLostStorage(), configList); - } else { - mMasterClient.register(mWorkerId.get(), - storeMeta.getStorageTierAssoc().getOrderedStorageAliases(), - storeMeta.getCapacityBytesOnTiers(), - storeMeta.getUsedBytesOnTiers(), storeMeta.getBlockListByStorageLocation(), - storeMeta.getLostStorage(), configList); - } - // If the worker registers with master successfully, the lease will be recycled on the - // master side. No need to manually request for recycle on the worker side. + mBlockMasterSyncHelper.registerToMaster(mWorkerId.get(), storeMeta); } /** * Heartbeats to the master node about the change in the worker's managed space. */ @Override - public void heartbeat() { - // Prepare metadata for the next heartbeat - BlockHeartbeatReport blockReport = mBlockWorker.getReport(); - BlockStoreMeta storeMeta = mBlockWorker.getStoreMeta(); - - // Send the heartbeat and execute the response - Command cmdFromMaster = null; - List metrics = MetricsSystem.reportWorkerMetrics(); - - try { - cmdFromMaster = mMasterClient.heartbeat(mWorkerId.get(), storeMeta.getCapacityBytesOnTiers(), - storeMeta.getUsedBytesOnTiers(), blockReport.getRemovedBlocks(), - blockReport.getAddedBlocks(), blockReport.getLostStorage(), metrics); - handleMasterCommand(cmdFromMaster); + public void heartbeat(long timeLimitMs) { + boolean success = mBlockMasterSyncHelper.heartbeat( + mWorkerId.get(), mBlockWorker.getReport(), + mBlockWorker.getStoreMeta(), this::handleMasterCommand); + if (success) { mLastSuccessfulHeartbeatMs = System.currentTimeMillis(); - } catch (IOException | ConnectionFailedException e) { - // An error occurred, log and ignore it or error if heartbeat timeout is reached - if (cmdFromMaster == null) { - LOG.error("Failed to receive master heartbeat command.", e); - } else { - LOG.error("Failed to receive or execute master heartbeat command: {}", cmdFromMaster, e); - } - mMasterClient.disconnect(); + } else { if (HEARTBEAT_TIMEOUT_MS > 0) { if (System.currentTimeMillis() - mLastSuccessfulHeartbeatMs >= HEARTBEAT_TIMEOUT_MS) { if (Configuration.getBoolean(PropertyKey.TEST_MODE)) { @@ -242,6 +180,9 @@ private void handleMasterCommand(Command cmd) throws IOException, ConnectionFail case Unknown: LOG.error("Master heartbeat sends unknown command {}", cmd); break; + case Decommissioned: + SAMPLING_LOG.info("This worker has been decommissioned"); + break; default: throw new RuntimeException("Un-recognized command from master " + cmd); } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSyncHelper.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSyncHelper.java new file mode 100644 index 000000000000..20dc2ded5550 --- /dev/null +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSyncHelper.java @@ -0,0 +1,156 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.ConnectionFailedException; +import alluxio.exception.FailedToAcquireRegisterLeaseException; +import alluxio.grpc.Command; +import alluxio.grpc.ConfigProperty; +import alluxio.grpc.Scope; +import alluxio.metrics.MetricsSystem; +import alluxio.retry.ExponentialTimeBoundedRetry; +import alluxio.retry.RetryPolicy; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.time.Duration; +import java.time.temporal.ChronoUnit; +import java.util.List; + +/** + * The helper class for block master sync related methods. + */ +public class BlockMasterSyncHelper { + private static final Logger LOG = LoggerFactory.getLogger(BlockMasterSync.class); + + private static final long ACQUIRE_LEASE_WAIT_BASE_SLEEP_MS = + Configuration.getMs(PropertyKey.WORKER_REGISTER_LEASE_RETRY_SLEEP_MIN); + private static final long ACQUIRE_LEASE_WAIT_MAX_SLEEP_MS = + Configuration.getMs(PropertyKey.WORKER_REGISTER_LEASE_RETRY_SLEEP_MAX); + private static final long ACQUIRE_LEASE_WAIT_MAX_DURATION = + Configuration.getMs(PropertyKey.WORKER_REGISTER_LEASE_RETRY_MAX_DURATION); + + private final BlockMasterClient mMasterClient; + + /** + * constructs an instance of the helper class. + * @param masterClient the block master client + */ + public BlockMasterSyncHelper(BlockMasterClient masterClient) { + mMasterClient = masterClient; + } + + @FunctionalInterface + interface MasterCommandHandler { + void handle(Command command) throws ConnectionFailedException, IOException; + } + + /** + * Gets the default retry policy for acquiring a {@link alluxio.wire.RegisterLease} + * from the BlockMaster. + * + * @return the policy to use + */ + public static RetryPolicy getDefaultAcquireLeaseRetryPolicy() { + return ExponentialTimeBoundedRetry.builder() + .withMaxDuration(Duration.of(ACQUIRE_LEASE_WAIT_MAX_DURATION, ChronoUnit.MILLIS)) + .withInitialSleep(Duration.of(ACQUIRE_LEASE_WAIT_BASE_SLEEP_MS, ChronoUnit.MILLIS)) + .withMaxSleep(Duration.of(ACQUIRE_LEASE_WAIT_MAX_SLEEP_MS, ChronoUnit.MILLIS)) + .withSkipInitialSleep() + .build(); + } + + /** + * acquires a lease from the master before registration. + * @param workerId the worker id + * @param storeMeta the store meta + */ + void tryAcquireLease( + long workerId, BlockStoreMeta storeMeta) + throws IOException, FailedToAcquireRegisterLeaseException { + boolean leaseRequired = Configuration.getBoolean(PropertyKey.WORKER_REGISTER_LEASE_ENABLED); + if (leaseRequired) { + LOG.info("Acquiring a RegisterLease from the master before registering"); + mMasterClient.acquireRegisterLeaseWithBackoff(workerId, + storeMeta.getNumberOfBlocks(), + getDefaultAcquireLeaseRetryPolicy()); + LOG.info("Lease acquired"); + } + // If the worker registers with master successfully, the lease will be recycled on the + // master side. No need to manually request for recycle on the worker side. + } + + /** + * registers the worker to the master. + * @param workerId the worker id + * @param fullStoreMeta the full store meta contains the block id list + */ + void registerToMaster( + long workerId, BlockStoreMeta fullStoreMeta) throws IOException { + List configList = + Configuration.getConfiguration(Scope.WORKER); + + boolean useStreaming = Configuration.getBoolean(PropertyKey.WORKER_REGISTER_STREAM_ENABLED); + if (useStreaming) { + mMasterClient.registerWithStream(workerId, + fullStoreMeta.getStorageTierAssoc().getOrderedStorageAliases(), + fullStoreMeta.getCapacityBytesOnTiers(), + fullStoreMeta.getUsedBytesOnTiers(), fullStoreMeta.getBlockListByStorageLocation(), + fullStoreMeta.getLostStorage(), configList); + } else { + mMasterClient.register(workerId, + fullStoreMeta.getStorageTierAssoc().getOrderedStorageAliases(), + fullStoreMeta.getCapacityBytesOnTiers(), + fullStoreMeta.getUsedBytesOnTiers(), fullStoreMeta.getBlockListByStorageLocation(), + fullStoreMeta.getLostStorage(), configList); + } + } + + /** + * heartbeats to the master and handles master heartbeat command. + * Errors are handled in the method. + * @param workerId the worker id + * @param blockReport the block report + * @param storeMeta the store meta + * @param handler the command handler + * @return true if the heartbeat succeeded + */ + boolean heartbeat( + long workerId, BlockHeartbeatReport blockReport, BlockStoreMeta storeMeta, + MasterCommandHandler handler + ) { + // Send the heartbeat and execute the response + Command cmdFromMaster = null; + List metrics = MetricsSystem.reportWorkerMetrics(); + try { + cmdFromMaster = mMasterClient.heartbeat(workerId, storeMeta.getCapacityBytesOnTiers(), + storeMeta.getUsedBytesOnTiers(), blockReport.getRemovedBlocks(), + blockReport.getAddedBlocks(), blockReport.getLostStorage(), metrics); + handler.handle(cmdFromMaster); + return true; + } catch (Exception e) { + // An error occurred, log and ignore it or error if heartbeat timeout is reached + if (cmdFromMaster == null) { + LOG.error("Failed to receive master heartbeat command. worker id {}", workerId, e); + } else { + LOG.error("Failed to receive or execute master heartbeat command: {}. worker id {}", + cmdFromMaster, workerId, e); + } + mMasterClient.disconnect(); + return false; + } + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java new file mode 100644 index 000000000000..ba9758da143a --- /dev/null +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java @@ -0,0 +1,183 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import alluxio.ClientContext; +import alluxio.ProcessUtils; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.heartbeat.FixedIntervalSupplier; +import alluxio.heartbeat.HeartbeatContext; +import alluxio.heartbeat.HeartbeatThread; +import alluxio.master.MasterClientContext; +import alluxio.security.user.ServerUserState; +import alluxio.util.CommonUtils; +import alluxio.util.ConfigurationUtils; +import alluxio.util.WaitForOptions; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeoutException; + +/** + * An abstraction layer that manages the worker heartbeats with multiple block masters. + * This is only active when worker.register.to.all.masters=true. + */ +public class BlockSyncMasterGroup implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(SpecificMasterBlockSync.class); + private volatile boolean mStarted = false; + + private final boolean mTestMode = Configuration.getBoolean(PropertyKey.TEST_MODE); + + private static BlockMasterClientFactory sBlockMasterClientFactory + = new BlockMasterClientFactory(); + + private static final long WORKER_MASTER_CONNECT_RETRY_TIMEOUT = + Configuration.getMs(PropertyKey.WORKER_MASTER_CONNECT_RETRY_TIMEOUT); + + /** + * Creates a block sync master group. + * @param masterAddresses the master addresses to sync + * @param blockWorker the block worker instance + */ + public BlockSyncMasterGroup( + List masterAddresses, + BlockWorker blockWorker + ) throws IOException { + // TODO(elega): handle master membership changes + // https://github.com/Alluxio/alluxio/issues/16898 + for (InetSocketAddress masterAddr : masterAddresses) { + BlockMasterClient masterClient = sBlockMasterClientFactory.create(masterAddr); + BlockHeartbeatReporter heartbeatReporter = new BlockHeartbeatReporter(); + + blockWorker.getBlockStore().registerBlockStoreEventListener(heartbeatReporter); + // Setup BlockMasterSync + SpecificMasterBlockSync blockMasterSync = mTestMode + ? new TestSpecificMasterBlockSync( + blockWorker, masterClient, heartbeatReporter) + : new SpecificMasterBlockSync( + blockWorker, masterClient, heartbeatReporter); + // Register each BlockMasterSync to the block events on this worker + mMasterSyncOperators.put(masterAddr, blockMasterSync); + LOG.info("Kick off BlockMasterSync with master {}", masterAddr); + } + } + + /** + * Starts the heartbeats. + * @param executorService the executor service to run the heartbeats + */ + public synchronized void start(ExecutorService executorService) { + if (!mStarted) { + mStarted = true; + } + mMasterSyncOperators.values().forEach(blockMasterSync -> executorService + .submit(new HeartbeatThread(HeartbeatContext.WORKER_BLOCK_SYNC, blockMasterSync, + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS)), + Configuration.global(), ServerUserState.global()))); + } + + private final Map mMasterSyncOperators = + new HashMap<>(); + + @Override + public void close() throws IOException { + mMasterSyncOperators.values().forEach( + SpecificMasterBlockSync::close + ); + } + + static void setBlockMasterClientFactory(BlockMasterClientFactory factory) { + sBlockMasterClientFactory = factory; + } + + /** + * Waits until the primary master registration completes. + * @param primaryMasterAddress the primary master address + */ + public void waitForPrimaryMasterRegistrationComplete(InetSocketAddress primaryMasterAddress) { + SpecificMasterBlockSync primaryMasterSync = + mMasterSyncOperators.get(primaryMasterAddress); + Preconditions.checkNotNull( + primaryMasterSync, "Primary master block sync should not be null"); + try { + CommonUtils.waitFor(this + " to start", + primaryMasterSync::isRegistered, + WaitForOptions.defaults().setTimeoutMs(WORKER_MASTER_CONNECT_RETRY_TIMEOUT)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.warn("Exit the worker on interruption", e); + throw new RuntimeException(e); + } catch (TimeoutException e) { + ProcessUtils.fatalError(LOG, e, "Failed to register with primary master"); + } + LOG.info("The worker has registered with primary master, address {}", primaryMasterAddress); + } + + /** + * @return if the worker is registered to all masters + */ + public boolean isRegisteredToAllMasters() { + return mMasterSyncOperators.values().stream().allMatch(SpecificMasterBlockSync::isRegistered); + } + + /** + * @return the master sync operators + */ + public Map getMasterSyncOperators() { + return mMasterSyncOperators; + } + + /** + * The factory class. + */ + public static class Factory { + /** + * Creates a block sync master group that heartbeats to all masters. + * @param blockWorker the block worker instance + * @return the block sync master group instance + */ + public static BlockSyncMasterGroup createAllMasterSync(BlockWorker blockWorker) { + List masterAddresses = + ConfigurationUtils.getMasterRpcAddresses(Configuration.global()); + try { + return new BlockSyncMasterGroup(masterAddresses, blockWorker); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + /** + * A factory class for testing purpose. + */ + @VisibleForTesting + static class BlockMasterClientFactory { + BlockMasterClient create(InetSocketAddress address) { + MasterClientContext context = MasterClientContext + .newBuilder(ClientContext.create(Configuration.global())).build(); + + return new BlockMasterClient(context, address); + } + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockWorkerFactory.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockWorkerFactory.java index 19a0e996f07a..a2f96428ea92 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockWorkerFactory.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockWorkerFactory.java @@ -34,6 +34,8 @@ @ThreadSafe public final class BlockWorkerFactory implements WorkerFactory { private static final Logger LOG = LoggerFactory.getLogger(BlockWorkerFactory.class); + private final boolean mWorkerRegisterToAllMasters = Configuration.getBoolean( + PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS); /** * Constructs a new {@link BlockWorkerFactory}. @@ -64,10 +66,17 @@ public BlockWorker create(WorkerRegistry registry, UfsManager ufsManager) { default: throw new UnsupportedOperationException("Unsupported block store type."); } - BlockWorker blockWorker = new DefaultBlockWorker(blockMasterClientPool, - new FileSystemMasterClient( - MasterClientContext.newBuilder(ClientContext.create(Configuration.global())).build()), - new Sessions(), blockStore, workerId); + BlockWorker blockWorker = mWorkerRegisterToAllMasters + ? new AllMasterRegistrationBlockWorker(blockMasterClientPool, + new FileSystemMasterClient( + MasterClientContext.newBuilder(ClientContext.create(Configuration.global())) + .build()), + new Sessions(), blockStore, workerId) + : new DefaultBlockWorker(blockMasterClientPool, + new FileSystemMasterClient( + MasterClientContext.newBuilder(ClientContext.create(Configuration.global())) + .build()), + new Sessions(), blockStore, workerId); registry.add(BlockWorker.class, blockWorker); return blockWorker; } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java b/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java index e265f5e330cc..0c2bb7ebc9bc 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java @@ -92,7 +92,7 @@ public void submitRequest(CacheRequest request) long blockId = request.getBlockId(); boolean async = request.getAsync(); if (mActiveCacheRequests.putIfAbsent(blockId, request) != null) { - // This block is already planned and just just return. + // This block is already planned and just return. if (async) { LOG.debug("request already planned: {}", request); } else { @@ -124,6 +124,7 @@ public void submitRequest(CacheRequest request) // gRPC thread pool is drained due to highly concurrent caching workloads. In these cases, // return as async caching is at best effort. mNumRejected.incrementAndGet(); + CACHE_REJECTED_BLOCKS.inc(); SAMPLING_LOG.warn(String.format( "Failed to cache block locally as the thread pool is at capacity." + " To increase, update the parameter '%s'. numRejected: {} error: {}", @@ -205,15 +206,20 @@ public boolean equals(Object obj) { public Void call() throws IOException, AlluxioException { long blockId = mRequest.getBlockId(); long blockLength = mRequest.getLength(); - boolean result = false; + CacheResult result = CacheResult.FAILED; try { result = cacheBlock(mRequest); } finally { - if (result) { - CACHE_BLOCKS_SIZE.inc(blockLength); - CACHE_SUCCEEDED_BLOCKS.inc(); - } else { - CACHE_FAILED_BLOCKS.inc(); + switch (result) { + case SUCCEED: + CACHE_BLOCKS_SIZE.inc(blockLength); + CACHE_SUCCEEDED_BLOCKS.inc(); + break; + case FAILED: + CACHE_FAILED_BLOCKS.inc(); + break; + default: + break; } mActiveCacheRequests.remove(blockId); } @@ -221,8 +227,13 @@ public Void call() throws IOException, AlluxioException { } } - private boolean cacheBlock(CacheRequest request) throws IOException, AlluxioException { - boolean result; + enum CacheResult { + + SUCCEED, FAILED, ALREADY_CACHED + } + + private CacheResult cacheBlock(CacheRequest request) throws IOException, AlluxioException { + CacheResult result; boolean isSourceLocal = NetworkAddressUtils.isLocalAddress(request.getSourceHost(), NETWORK_HOST_RESOLUTION_TIMEOUT); long blockId = request.getBlockId(); @@ -230,7 +241,7 @@ private boolean cacheBlock(CacheRequest request) throws IOException, AlluxioExce // Check if the block has already been cached on this worker if (mBlockWorker.getBlockStore().hasBlockMeta(blockId)) { LOG.debug("block already cached: {}", blockId); - return true; + return CacheResult.ALREADY_CACHED; } Protocol.OpenUfsBlockOptions openUfsBlockOptions = request.getOpenUfsBlockOptions(); // Depends on the request, cache the target block from different sources @@ -254,9 +265,9 @@ private boolean cacheBlock(CacheRequest request) throws IOException, AlluxioExce * @param blockId block ID * @param blockSize block size * @param openUfsBlockOptions options to open the UFS file - * @return if the block is cached + * @return cache result */ - private boolean cacheBlockFromUfs(long blockId, long blockSize, + private CacheResult cacheBlockFromUfs(long blockId, long blockSize, Protocol.OpenUfsBlockOptions openUfsBlockOptions) throws IOException { try (BlockReader reader = mBlockWorker.createUfsBlockReader( Sessions.CACHE_UFS_SESSION_ID, blockId, 0, false, openUfsBlockOptions)) { @@ -271,7 +282,7 @@ private boolean cacheBlockFromUfs(long blockId, long blockSize, offset += bufferSize; } } - return true; + return CacheResult.SUCCEED; } /** @@ -281,15 +292,15 @@ private boolean cacheBlockFromUfs(long blockId, long blockSize, * @param blockSize block size * @param sourceAddress the source to read the block previously by client * @param openUfsBlockOptions options to open the UFS file - * @return if the block is cached + * @return cache result */ - private boolean cacheBlockFromRemoteWorker(long blockId, long blockSize, + private CacheResult cacheBlockFromRemoteWorker(long blockId, long blockSize, InetSocketAddress sourceAddress, Protocol.OpenUfsBlockOptions openUfsBlockOptions) throws IOException { if (mBlockWorker.getBlockStore().hasBlockMeta(blockId) || mBlockWorker.getBlockStore().hasTempBlockMeta(blockId)) { // It is already cached - return true; + return CacheResult.ALREADY_CACHED; } mBlockWorker.createBlock(Sessions.CACHE_WORKER_SESSION_ID, blockId, 0, new CreateBlockOptions(null, "", blockSize)); @@ -300,7 +311,7 @@ private boolean cacheBlockFromRemoteWorker(long blockId, long blockSize, .createBlockWriter(Sessions.CACHE_WORKER_SESSION_ID, blockId)) { BufferUtils.transfer(reader.getChannel(), writer.getChannel()); mBlockWorker.commitBlock(Sessions.CACHE_WORKER_SESSION_ID, blockId, false); - return true; + return CacheResult.SUCCEED; } catch (IllegalStateException | IOException e) { LOG.warn("Failed to async cache block {} from remote worker ({}) on copying the block: {}", blockId, sourceAddress, e.toString()); @@ -341,6 +352,8 @@ public RemoteBlockReader getRemoteBlockReader(long blockId, long blockSize, MetricsSystem.counter(MetricKey.WORKER_CACHE_REMOTE_BLOCKS.getName()); private static final Counter CACHE_SUCCEEDED_BLOCKS = MetricsSystem.counter(MetricKey.WORKER_CACHE_SUCCEEDED_BLOCKS.getName()); + private static final Counter CACHE_REJECTED_BLOCKS = + MetricsSystem.counter(MetricKey.WORKER_CACHE_REJECTED_BLOCKS.getName()); private static final Counter CACHE_UFS_BLOCKS = MetricsSystem.counter(MetricKey.WORKER_CACHE_UFS_BLOCKS.getName()); private static final Counter CACHE_BLOCKS_SIZE = diff --git a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java index cd08337e3b05..513a8664f9cf 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java @@ -38,6 +38,7 @@ import alluxio.grpc.GrpcService; import alluxio.grpc.ServiceType; import alluxio.grpc.UfsReadOptions; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatExecutor; import alluxio.heartbeat.HeartbeatThread; @@ -59,6 +60,7 @@ import alluxio.worker.grpc.GrpcExecutors; import alluxio.worker.page.PagedBlockStore; +import com.codahale.metrics.CachedGauge; import com.codahale.metrics.Counter; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -76,6 +78,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import javax.annotation.concurrent.NotThreadSafe; import javax.annotation.concurrent.ThreadSafe; @@ -92,11 +95,10 @@ @NotThreadSafe public class DefaultBlockWorker extends AbstractWorker implements BlockWorker { private static final Logger LOG = LoggerFactory.getLogger(DefaultBlockWorker.class); - private static final long UFS_BLOCK_OPEN_TIMEOUT_MS = - Configuration.getMs(PropertyKey.WORKER_UFS_BLOCK_OPEN_TIMEOUT_MS); + public static final int CACHEGAUGE_UPDATE_INTERVAL = 5000; /** Used to close resources during stop. */ - private final Closer mResourceCloser = Closer.create(); + protected final Closer mResourceCloser = Closer.create(); /** * Block master clients. commitBlock is the only reason to keep a pool of block master clients * on each worker. We should either improve our RPC model in the master or get rid of the @@ -105,14 +107,14 @@ public class DefaultBlockWorker extends AbstractWorker implements BlockWorker { private final BlockMasterClientPool mBlockMasterClientPool; /** Client for all file system master communication. */ - private final FileSystemMasterClient mFileSystemMasterClient; + protected final FileSystemMasterClient mFileSystemMasterClient; /** Block store delta reporter for master heartbeat. */ private final BlockHeartbeatReporter mHeartbeatReporter; /** Session metadata, used to keep track of session heartbeats. */ private final Sessions mSessions; /** Block Store manager. */ - private final BlockStore mBlockStore; + protected final BlockStore mBlockStore; /** List of paths to always keep in memory. */ private final PrefixList mWhitelist; @@ -120,12 +122,12 @@ public class DefaultBlockWorker extends AbstractWorker implements BlockWorker { * The worker ID for this worker. This is initialized in {@link #start(WorkerNetAddress)} and may * be updated by the block sync thread if the master requests re-registration. */ - private final AtomicReference mWorkerId; + protected final AtomicReference mWorkerId; private final CacheRequestManager mCacheManager; private final FuseManager mFuseManager; - private WorkerNetAddress mAddress; + protected WorkerNetAddress mAddress; /** * Constructs a default block worker. @@ -140,7 +142,11 @@ public class DefaultBlockWorker extends AbstractWorker implements BlockWorker { public DefaultBlockWorker(BlockMasterClientPool blockMasterClientPool, FileSystemMasterClient fileSystemMasterClient, Sessions sessions, BlockStore blockStore, AtomicReference workerId) { - super(ExecutorServiceFactories.fixedThreadPool("block-worker-executor", 5)); + super( + Configuration.getBoolean(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS) + ? ExecutorServiceFactories.cachedThreadPool("block-worker-executor") + : ExecutorServiceFactories.fixedThreadPool("block-worker-executor", 5) + ); mBlockMasterClientPool = mResourceCloser.register(blockMasterClientPool); mFileSystemMasterClient = mResourceCloser.register(fileSystemMasterClient); mHeartbeatReporter = new BlockHeartbeatReporter(); @@ -171,6 +177,11 @@ public BlockStore getBlockStore() { return mBlockStore; } + @Override + public WorkerNetAddress getWorkerAddress() { + return mAddress; + } + @Override public Set> getDependencies() { return new HashSet<>(); @@ -209,19 +220,15 @@ public void start(WorkerNetAddress address) throws IOException { Preconditions.checkNotNull(mAddress, "mAddress"); // Setup BlockMasterSync - BlockMasterSync blockMasterSync = mResourceCloser - .register(new BlockMasterSync(this, mWorkerId, mAddress, mBlockMasterClientPool)); - getExecutorService() - .submit(new HeartbeatThread(HeartbeatContext.WORKER_BLOCK_SYNC, blockMasterSync, - (int) Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), - Configuration.global(), ServerUserState.global())); + setupBlockMasterSync(); // Setup PinListSyncer PinListSync pinListSync = mResourceCloser.register( new PinListSync(this, mFileSystemMasterClient)); getExecutorService() .submit(new HeartbeatThread(HeartbeatContext.WORKER_PIN_LIST_SYNC, pinListSync, - (int) Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS)), Configuration.global(), ServerUserState.global())); // Setup session cleaner @@ -234,7 +241,8 @@ public void start(WorkerNetAddress address) throws IOException { StorageChecker storageChecker = mResourceCloser.register(new StorageChecker()); getExecutorService() .submit(new HeartbeatThread(HeartbeatContext.WORKER_STORAGE_HEALTH, storageChecker, - (int) Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS)), Configuration.global(), ServerUserState.global())); } @@ -244,6 +252,16 @@ public void start(WorkerNetAddress address) throws IOException { } } + protected void setupBlockMasterSync() throws IOException { + BlockMasterSync blockMasterSync = mResourceCloser + .register(new BlockMasterSync(this, mWorkerId, mAddress, mBlockMasterClientPool)); + getExecutorService() + .submit(new HeartbeatThread(HeartbeatContext.WORKER_BLOCK_SYNC, blockMasterSync, + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS)), + Configuration.global(), ServerUserState.global())); + } + /** * Ask the master for a workerId. Should not be called outside of testing * @@ -328,7 +346,7 @@ public BlockWriter createBlockWriter(long sessionId, long blockId) @Override public BlockHeartbeatReport getReport() { - return mHeartbeatReporter.generateReport(); + return mHeartbeatReporter.generateReportAndClear(); } @Override @@ -485,8 +503,8 @@ public alluxio.wire.Configuration getConfiguration(GetConfigurationPOptions opti // NOTE(cc): assumes that Configuration is read-only when master is running, otherwise, // the following hash might not correspond to the above cluster configuration. builder.setClusterConfHash(Configuration.hash()); + builder.setClusterConfLastUpdateTime(Configuration.getLastUpdateTime()); } - return builder.build(); } @@ -505,45 +523,54 @@ public void cleanupSession(long sessionId) { public static final class Metrics { public static final Counter WORKER_ACTIVE_CLIENTS = MetricsSystem.counter(MetricKey.WORKER_ACTIVE_CLIENTS.getName()); + public static final Counter WORKER_ACTIVE_OPERATIONS = + MetricsSystem.counter(MetricKey.WORKER_ACTIVE_OPERATIONS.getName()); /** * Registers metric gauges. * - * @param blockWorker the block worker handle + * @param blockWorker the BlockWorker */ public static void registerGauges(final BlockWorker blockWorker) { - MetricsSystem.registerGaugeIfAbsent( + CachedGauge cache = + new CachedGauge(CACHEGAUGE_UPDATE_INTERVAL, TimeUnit.MILLISECONDS) { + @Override + protected BlockWorkerMetrics loadValue() { + BlockStoreMeta meta = blockWorker.getStoreMetaFull(); + BlockWorkerMetrics metrics = BlockWorkerMetrics.from(meta, WORKER_STORAGE_TIER_ASSOC); + return metrics; + } + }; + MetricsSystem.registerCachedGaugeIfAbsent( MetricsSystem.getMetricName(MetricKey.WORKER_CAPACITY_TOTAL.getName()), - () -> blockWorker.getStoreMeta().getCapacityBytes()); + () -> cache.getValue().getCapacityBytes()); - MetricsSystem.registerGaugeIfAbsent( + MetricsSystem.registerCachedGaugeIfAbsent( MetricsSystem.getMetricName(MetricKey.WORKER_CAPACITY_USED.getName()), - () -> blockWorker.getStoreMeta().getUsedBytes()); + () -> cache.getValue().getUsedBytes()); - MetricsSystem.registerGaugeIfAbsent( + MetricsSystem.registerCachedGaugeIfAbsent( MetricsSystem.getMetricName(MetricKey.WORKER_CAPACITY_FREE.getName()), - () -> blockWorker.getStoreMeta().getCapacityBytes() - blockWorker.getStoreMeta() - .getUsedBytes()); + () -> cache.getValue().getCapacityFree()); for (int i = 0; i < WORKER_STORAGE_TIER_ASSOC.size(); i++) { String tier = WORKER_STORAGE_TIER_ASSOC.getAlias(i); // TODO(lu) Add template to dynamically generate MetricKey MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getMetricName( MetricKey.WORKER_CAPACITY_TOTAL.getName() + MetricInfo.TIER + tier), - () -> blockWorker.getStoreMeta().getCapacityBytesOnTiers().getOrDefault(tier, 0L)); + () -> cache.getValue().getCapacityBytesOnTiers().getOrDefault(tier, 0L)); - MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getMetricName( + MetricsSystem.registerCachedGaugeIfAbsent(MetricsSystem.getMetricName( MetricKey.WORKER_CAPACITY_USED.getName() + MetricInfo.TIER + tier), - () -> blockWorker.getStoreMeta().getUsedBytesOnTiers().getOrDefault(tier, 0L)); + () -> cache.getValue().getUsedBytesOnTiers().getOrDefault(tier, 0L)); - MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getMetricName( + MetricsSystem.registerCachedGaugeIfAbsent(MetricsSystem.getMetricName( MetricKey.WORKER_CAPACITY_FREE.getName() + MetricInfo.TIER + tier), - () -> blockWorker.getStoreMeta().getCapacityBytesOnTiers().getOrDefault(tier, 0L) - - blockWorker.getStoreMeta().getUsedBytesOnTiers().getOrDefault(tier, 0L)); + () -> cache.getValue().getFreeBytesOnTiers().getOrDefault(tier, 0L)); } - MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getMetricName( + MetricsSystem.registerCachedGaugeIfAbsent(MetricsSystem.getMetricName( MetricKey.WORKER_BLOCKS_CACHED.getName()), - () -> blockWorker.getStoreMetaFull().getNumberOfBlocks()); + () -> cache.getValue().getNumberOfBlocks()); } private Metrics() {} // prevent instantiation @@ -557,7 +584,7 @@ private Metrics() {} // prevent instantiation public final class StorageChecker implements HeartbeatExecutor { @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { try { mBlockStore.removeInaccessibleStorage(); } catch (Exception e) { diff --git a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java index 187cbdcd7cec..4ef1238dfd31 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java @@ -33,6 +33,7 @@ import alluxio.retry.RetryUtils; import alluxio.underfs.UfsManager; import alluxio.util.ThreadFactoryUtils; +import alluxio.worker.block.DefaultBlockWorker.Metrics; import alluxio.worker.block.io.BlockReader; import alluxio.worker.block.io.BlockWriter; import alluxio.worker.block.io.DelegatingBlockReader; @@ -53,6 +54,7 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; @@ -70,16 +72,20 @@ public class MonoBlockStore implements BlockStore { private final UnderFileSystemBlockStore mUnderFileSystemBlockStore; private final BlockMasterClientPool mBlockMasterClientPool; private final AtomicReference mWorkerId; + + private final List mBlockStoreEventListeners = + new CopyOnWriteArrayList<>(); + private final ScheduledExecutorService mDelayer = new ScheduledThreadPoolExecutor(1, ThreadFactoryUtils.build("LoadTimeOut", true)); /** * Constructor of MonoBlockStore. * - * @param localBlockStore - * @param blockMasterClientPool - * @param ufsManager - * @param workerId + * @param localBlockStore the local block store + * @param blockMasterClientPool a client pool for talking to the block master + * @param ufsManager the UFS manager + * @param workerId the worker id */ public MonoBlockStore(LocalBlockStore localBlockStore, BlockMasterClientPool blockMasterClientPool, @@ -118,6 +124,11 @@ public void commitBlock(long sessionId, long blockId, boolean pinOnCreate) { blockMasterClient.commitBlock(mWorkerId.get(), mLocalBlockStore.getBlockStoreMeta().getUsedBytesOnTiers().get(loc.tierAlias()), loc.tierAlias(), loc.mediumType(), blockId, meta.getBlockSize()); + for (BlockStoreEventListener listener : mBlockStoreEventListeners) { + synchronized (listener) { + listener.onCommitBlockToMaster(blockId, loc); + } + } } catch (AlluxioStatusException e) { throw AlluxioRuntimeException.from(e); } finally { @@ -148,19 +159,23 @@ public BlockReader createBlockReader(long sessionId, long blockId, long offset, boolean positionShort, Protocol.OpenUfsBlockOptions options) throws IOException { BlockReader reader; - Optional blockMeta = mLocalBlockStore.getVolatileBlockMeta(blockId); - if (blockMeta.isPresent()) { + // first try reading from Alluxio cache + try { reader = mLocalBlockStore.createBlockReader(sessionId, blockId, offset); - } else { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_CLIENTS.inc(); + return reader; + } catch (BlockDoesNotExistRuntimeException e) { + LOG.debug("Block {} does not exist in Alluxio cache: {}", blockId, e.getMessage()); + // the block does not exist in Alluxio, try loading from UFS boolean checkUfs = options != null && (options.hasUfsPath() || options.getBlockInUfsTier()); if (!checkUfs) { - throw new BlockDoesNotExistRuntimeException(blockId); + throw e; } // When the block does not exist in Alluxio but exists in UFS, try to open the UFS block. reader = createUfsBlockReader(sessionId, blockId, offset, positionShort, options); + DefaultBlockWorker.Metrics.WORKER_ACTIVE_CLIENTS.inc(); + return reader; } - DefaultBlockWorker.Metrics.WORKER_ACTIVE_CLIENTS.inc(); - return reader; } @Override @@ -171,16 +186,19 @@ public BlockReader createUfsBlockReader(long sessionId, long blockId, long offse try { BlockReader reader = mUnderFileSystemBlockStore.createBlockReader(sessionId, blockId, offset, positionShort, options); - return new DelegatingBlockReader(reader, () -> closeUfsBlock(sessionId, blockId)); + BlockReader blockReader = new DelegatingBlockReader(reader, + () -> closeUfsBlock(sessionId, blockId, true)); + Metrics.WORKER_ACTIVE_CLIENTS.inc(); + return blockReader; } catch (Exception e) { try { - closeUfsBlock(sessionId, blockId); + closeUfsBlock(sessionId, blockId, false); } catch (Exception ee) { LOG.warn("Failed to close UFS block", ee); } String errorMessage = format("Failed to read from UFS, sessionId=%d, " + "blockId=%d, offset=%d, positionShort=%s, options=%s: %s", - sessionId, blockId, offset, positionShort, options, e); + sessionId, blockId, offset, positionShort, options, e.toString()); if (e instanceof FileNotFoundException) { throw new NotFoundException(errorMessage, e); } @@ -188,13 +206,17 @@ public BlockReader createUfsBlockReader(long sessionId, long blockId, long offse } } - private void closeUfsBlock(long sessionId, long blockId) + private void closeUfsBlock(long sessionId, long blockId, boolean successful) throws IOException { try { mUnderFileSystemBlockStore.closeBlock(sessionId, blockId); Optional tempBlockMeta = mLocalBlockStore.getTempBlockMeta(blockId); if (tempBlockMeta.isPresent() && tempBlockMeta.get().getSessionId() == sessionId) { - commitBlock(sessionId, blockId, false); + if (successful) { + commitBlock(sessionId, blockId, false); + } else { + abortBlock(sessionId, blockId); + } } else { // When getTempBlockMeta() return null, such as a block readType NO_CACHE writeType THROUGH. // Counter will not be decrement in the commitblock(). @@ -266,6 +288,8 @@ public void updatePinnedInodes(Set inodes) { @Override public void registerBlockStoreEventListener(BlockStoreEventListener listener) { + LOG.debug("registerBlockStoreEventListener: listener={}", listener); + mBlockStoreEventListeners.add(listener); mLocalBlockStore.registerBlockStoreEventListener(listener); } @@ -307,7 +331,14 @@ public CompletableFuture> load(List blocks, UfsReadOpti handleException(e, block, errors, sessionId); continue; } - ByteBuffer buf = NioDirectBufferPool.acquire((int) blockSize); + ByteBuffer buf; + try { + buf = NioDirectBufferPool.acquire((int) blockSize, + new ExponentialBackoffRetry(1000, 5000, 5)); + } catch (Exception e) { + handleException(e, block, errors, sessionId); + continue; + } CompletableFuture future = RetryUtils.retryCallable("read from ufs", () -> manager.read(buf, block.getOffsetInFile(), blockSize, blockId, block.getUfsPath(), options), @@ -323,12 +354,12 @@ public CompletableFuture> load(List blocks, UfsReadOpti blockWriter.close(); } catch (IOException e) { throw AlluxioRuntimeException.from(e); - } finally { - NioDirectBufferPool.release(buf); } }) .thenRun(() -> commitBlock(sessionId, blockId, false)) + .thenRun(() -> NioDirectBufferPool.release(buf)) .exceptionally(t -> { + NioDirectBufferPool.release(buf); handleException(t.getCause(), block, errors, sessionId); return null; }); diff --git a/core/server/worker/src/main/java/alluxio/worker/block/PinListSync.java b/core/server/worker/src/main/java/alluxio/worker/block/PinListSync.java index a85a50092a3c..67ac89a7357d 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/PinListSync.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/PinListSync.java @@ -47,7 +47,7 @@ public PinListSync(BlockWorker blockWorker, FileSystemMasterClient masterClient) } @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { // Send the sync try { Set pinList = mMasterClient.getPinList(); diff --git a/core/server/worker/src/main/java/alluxio/worker/block/RegisterStreamer.java b/core/server/worker/src/main/java/alluxio/worker/block/RegisterStreamer.java index 69ea65239817..33bf971e9730 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/RegisterStreamer.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/RegisterStreamer.java @@ -99,7 +99,35 @@ public RegisterStreamer( final Map> lostStorage, final List configList) { this(asyncClient, workerId, storageTierAliases, totalBytesOnTiers, usedBytesOnTiers, - lostStorage, configList, new BlockMapIterator(currentBlocksOnLocation)); + lostStorage, configList, new BlockMapIterator(currentBlocksOnLocation), + BuildVersion.newBuilder() + .setVersion(ProjectConstants.VERSION) + .setRevision(ProjectConstants.REVISION).build()); + } + + /** + * Constructor. + * + * @param asyncClient the grpc client + * @param workerId the worker ID + * @param storageTierAliases storage/tier setup from the configuration + * @param totalBytesOnTiers the capacity of each tier + * @param usedBytesOnTiers the current usage of each tier + * @param currentBlocksOnLocation the blocks in each tier/dir + * @param lostStorage the lost storage paths + * @param configList the configuration properties + * @param version the version info + */ + @VisibleForTesting + public RegisterStreamer( + final BlockMasterWorkerServiceGrpc.BlockMasterWorkerServiceStub asyncClient, + final long workerId, final List storageTierAliases, + final Map totalBytesOnTiers, final Map usedBytesOnTiers, + final Map> currentBlocksOnLocation, + final Map> lostStorage, + final List configList, BuildVersion version) { + this(asyncClient, workerId, storageTierAliases, totalBytesOnTiers, usedBytesOnTiers, + lostStorage, configList, new BlockMapIterator(currentBlocksOnLocation), version); } /** @@ -115,24 +143,50 @@ public RegisterStreamer( * @param blockListIterator an iterator used to iterate the blocks */ public RegisterStreamer( + final BlockMasterWorkerServiceGrpc.BlockMasterWorkerServiceStub asyncClient, + final long workerId, final List storageTierAliases, + final Map totalBytesOnTiers, final Map usedBytesOnTiers, + final Map> lostStorage, + final List configList, + BlockMapIterator blockListIterator) { + this(asyncClient, workerId, storageTierAliases, totalBytesOnTiers, usedBytesOnTiers, + lostStorage, configList, blockListIterator, + BuildVersion.newBuilder() + .setVersion(ProjectConstants.VERSION) + .setRevision(ProjectConstants.REVISION).build()); + } + + /** + * Constructor. + * + * @param asyncClient the grpc client + * @param workerId the worker ID + * @param storageTierAliases storage/tier setup from the configuration + * @param totalBytesOnTiers the capacity of each tier + * @param usedBytesOnTiers the current usage of each tier + * @param lostStorage the lost storage paths + * @param configList the configuration properties + * @param blockListIterator an iterator used to iterate the blocks + */ + private RegisterStreamer( final BlockMasterWorkerServiceGrpc.BlockMasterWorkerServiceStub asyncClient, final long workerId, final List storageTierAliases, final Map totalBytesOnTiers, final Map usedBytesOnTiers, final Map> lostStorage, final List configList, - BlockMapIterator blockListIterator) { + BlockMapIterator blockListIterator, + BuildVersion buildVersion) { mAsyncClient = asyncClient; mWorkerId = workerId; mStorageTierAliases = storageTierAliases; mTotalBytesOnTiers = totalBytesOnTiers; mUsedBytesOnTiers = usedBytesOnTiers; - final BuildVersion buildVersion = BuildVersion.newBuilder() - .setVersion(ProjectConstants.VERSION) - .setRevision(ProjectConstants.REVISION) - .build(); mOptions = RegisterWorkerPOptions.newBuilder().addAllConfigs(configList) - .setBuildVersion(buildVersion).build(); + .setBuildVersion(buildVersion) + .setNumVCpu(Runtime.getRuntime().availableProcessors()) + .build(); + mLostStorageMap = lostStorage.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, e -> StorageList.newBuilder().addAllStorage(e.getValue()).build())); diff --git a/core/server/worker/src/main/java/alluxio/worker/block/SpecificMasterBlockSync.java b/core/server/worker/src/main/java/alluxio/worker/block/SpecificMasterBlockSync.java new file mode 100644 index 000000000000..660e0735c785 --- /dev/null +++ b/core/server/worker/src/main/java/alluxio/worker/block/SpecificMasterBlockSync.java @@ -0,0 +1,292 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import alluxio.ProcessUtils; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.ConnectionFailedException; +import alluxio.exception.FailedToAcquireRegisterLeaseException; +import alluxio.grpc.Command; +import alluxio.heartbeat.HeartbeatExecutor; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.retry.ExponentialBackoffRetry; +import alluxio.retry.RetryPolicy; +import alluxio.util.CommonUtils; +import alluxio.wire.WorkerNetAddress; + +import com.codahale.metrics.Counter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.net.SocketAddress; +import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * The block master sync thread when workers are registered to all masters. + * With respect to behaviors, this implementation differs from {@link BlockMasterSync} in: + * 1. The registration takes place asynchronously and the caller can poll the registration state. + * We need to make the process async because when standby master read is enabled, workers have to + * register to all masters and these registrations can happen concurrently to speed up the process. + * 2. A registration failure doesn't throw a fatal exception. Instead, it retries endlessly. + * This is because a standby master registration failure + * should be a soft failure and can be retried later. + */ +@NotThreadSafe +public class SpecificMasterBlockSync implements HeartbeatExecutor, Closeable { + private static final Logger LOG = LoggerFactory.getLogger(SpecificMasterBlockSync.class); + private static final long ACQUIRE_LEASE_WAIT_MAX_DURATION = + Configuration.getMs(PropertyKey.WORKER_REGISTER_LEASE_RETRY_MAX_DURATION); + + private final long mWorkerBlockHeartbeatReportSizeThreshold = + Configuration.getInt(PropertyKey.WORKER_BLOCK_HEARTBEAT_REPORT_SIZE_THRESHOLD); + + private final SocketAddress mMasterAddress; + + /** + * The worker registration state. + * If the state is NOT_REGISTERED, heartbeat will trigger a registration. + * During the registration process, the state will be set to REGISTERING. + * When the registration is done, the state will be set to REGISTERED. + * When the sync receives a registration command from the master during the heartbeat, + * the state will be reset to NOT_REGISTERED and the sync will attempt to register it again + * in the next heartbeat. + */ + private volatile WorkerMasterRegistrationState mWorkerState = + WorkerMasterRegistrationState.NOT_REGISTERED; + + /** + * An async service to remove block. + */ + private final AsyncBlockRemover mAsyncBlockRemover; + + /** + * The worker ID for the worker. This may change if the master asks the worker to re-register. + */ + private final AtomicReference mWorkerId; + + /** + * Client for all master communication. + */ + private final BlockMasterClient mMasterClient; + + /** + * The net address of the worker. + */ + private final WorkerNetAddress mWorkerAddress; + + /** + * The helper instance for sync related methods. + */ + private final BlockMasterSyncHelper mBlockMasterSyncHelper; + + /** + * The block worker responsible for interacting with Alluxio and UFS storage. + */ + private final BlockWorker mBlockWorker; + /** + * Last System.currentTimeMillis() timestamp when a heartbeat successfully completed. + */ + private long mLastSuccessfulHeartbeatMs = 0; + + private final BlockHeartbeatReporter mBlockHeartbeatReporter; + + /** + * Creates a new instance of {@link SpecificMasterBlockSync}. + * + * @param blockWorker the {@link BlockWorker} this syncer is updating to + * @param masterClient the block master client + * @param heartbeatReporter the heartbeat reporter + */ + public SpecificMasterBlockSync( + BlockWorker blockWorker, + BlockMasterClient masterClient, BlockHeartbeatReporter heartbeatReporter) + throws IOException { + mBlockWorker = blockWorker; + mWorkerId = blockWorker.getWorkerId(); + mWorkerAddress = blockWorker.getWorkerAddress(); + mMasterClient = masterClient; + mAsyncBlockRemover = new AsyncBlockRemover(mBlockWorker); + mBlockMasterSyncHelper = new BlockMasterSyncHelper(mMasterClient); + mMasterAddress = masterClient.getRemoteSockAddress(); + mBlockHeartbeatReporter = heartbeatReporter; + } + + private void registerWithMaster() { + RetryPolicy retry = createEndlessRetry(); + while (retry.attempt()) { + try { + LOG.info("Registering with master {}", mMasterAddress); + // The content in the report can be cleared because registration will + // report these block information anyways. + mBlockHeartbeatReporter.clear(); + registerWithMasterInternal(); + LOG.info("Finished registration with {}", mMasterAddress); + return; + } catch (Exception e) { + LOG.error("Failed to register with master {}, error {}, retry count {} Will retry...", + mMasterAddress, e, retry.getAttemptCount()); + mWorkerState = WorkerMasterRegistrationState.NOT_REGISTERED; + } + } + // Should not reach here because the retry is indefinite + ProcessUtils.fatalError(LOG, new RuntimeException(), + "Failed to register with master %s", mMasterAddress); + } + + protected void registerWithMasterInternal() + throws IOException, FailedToAcquireRegisterLeaseException { + // The target master is not necessarily the one that allocated the workerID + LOG.info("Notify the master {} about the workerID {}", mMasterAddress, mWorkerId); + mMasterClient.notifyWorkerId(mWorkerId.get(), mWorkerAddress); + // TODO(elega) If worker registration to all masters happens at the same time, + // this might cause worker OOM issues because each block sync thread will hold a BlockStoreMeta + // instance during the registration. + // If this happens, consider limiting the worker registration concurrency, + // e.g. register the worker to masters one by one. + BlockStoreMeta storeMeta = mBlockWorker.getStoreMetaFull(); + + try { + mBlockMasterSyncHelper.tryAcquireLease(mWorkerId.get(), storeMeta); + } catch (FailedToAcquireRegisterLeaseException e) { + if (Configuration.getBoolean(PropertyKey.TEST_MODE)) { + throw new RuntimeException(String.format("Master register lease timeout exceeded: %dms", + ACQUIRE_LEASE_WAIT_MAX_DURATION)); + } + throw e; + } + mWorkerState = WorkerMasterRegistrationState.REGISTERING; + mBlockMasterSyncHelper.registerToMaster(mWorkerId.get(), storeMeta); + + mWorkerState = WorkerMasterRegistrationState.REGISTERED; + Metrics.WORKER_MASTER_REGISTRATION_SUCCESS_COUNT.inc(); + mLastSuccessfulHeartbeatMs = CommonUtils.getCurrentMs(); + } + + private RetryPolicy createEndlessRetry() { + return new ExponentialBackoffRetry( + 1000, 60 * 1000, Integer.MAX_VALUE); + } + + @Override + public synchronized void heartbeat(long runLimit) throws InterruptedException { + if (mWorkerState == WorkerMasterRegistrationState.NOT_REGISTERED) { + // Not registered because: + // 1. The worker just started, we kick off the 1st registration here. + // 2. Master sends a registration command during + // the heartbeat and resets the registration state. (e.g. master restarted) + // 3. The heartbeat message becomes too big that we decide to fall back to a full re-register + LOG.info("The worker needs to register with master {}", mMasterAddress); + // This will retry indefinitely and essentially block here if the master is not ready + registerWithMaster(); + LOG.info("BlockMasterSync to master {} has started", mMasterAddress); + } + if (mWorkerState == WorkerMasterRegistrationState.REGISTERING) { + return; + } + + RetryPolicy endlessRetry = createEndlessRetry(); + while (endlessRetry.attempt()) { + BlockHeartbeatReport report = mBlockHeartbeatReporter.generateReportAndClear(); + boolean success = false; + try { + beforeHeartbeat(); + success = mBlockMasterSyncHelper.heartbeat( + mWorkerId.get(), report, + mBlockWorker.getStoreMeta(), this::handleMasterCommand); + } catch (Exception e) { + LOG.error("Failed to receive master heartbeat command. worker id {}", mWorkerId, e); + } + if (success) { + mLastSuccessfulHeartbeatMs = CommonUtils.getCurrentMs(); + break; + } else { + LOG.warn( + "Heartbeat failed, worker id {}, worker host {} # of attempts {}, last success ts {}", + mWorkerId.get(), mWorkerAddress.getHost(), endlessRetry.getAttemptCount(), + mLastSuccessfulHeartbeatMs); + if (report.getBlockChangeCount() >= mWorkerBlockHeartbeatReportSizeThreshold) { + // If the report becomes too big, merging it back to the reporter might cause OOM issue. + // We throw away the result and let the worker re-register with the master. + mWorkerState = WorkerMasterRegistrationState.NOT_REGISTERED; + return; + } else { + mBlockHeartbeatReporter.mergeBack(report); + } + } + } + } + + protected void beforeHeartbeat() { + } + + @Override + public void close() { + mAsyncBlockRemover.shutDown(); + mMasterClient.close(); + } + + /** + * @return if the worker has registered with the master successfully + */ + public boolean isRegistered() { + return mWorkerState == WorkerMasterRegistrationState.REGISTERED; + } + + /** + * Handles a master command. The command is one of Unknown, Nothing, Register, Free, or Delete. + * This call will block until the command is complete. + * + * @param cmd the command to execute + * @throws IOException if I/O errors occur + * @throws ConnectionFailedException if connection fails + */ + private void handleMasterCommand(Command cmd) throws IOException, ConnectionFailedException { + if (cmd == null) { + return; + } + switch (cmd.getCommandType()) { + // Currently unused + case Delete: + break; + // Master requests blocks to be removed from Alluxio managed space. + case Free: + mAsyncBlockRemover.addBlocksToDelete(cmd.getDataList()); + break; + // No action required + case Nothing: + break; + // Master requests re-registration + case Register: + mWorkerState = WorkerMasterRegistrationState.NOT_REGISTERED; + break; + // Unknown request + case Unknown: + LOG.error("Master heartbeat sends unknown command {}", cmd); + break; + default: + throw new RuntimeException("Un-recognized command from master " + cmd); + } + } + + /** + * Metrics. + */ + public static final class Metrics { + private static final Counter WORKER_MASTER_REGISTRATION_SUCCESS_COUNT + = MetricsSystem.counter(MetricKey.WORKER_MASTER_REGISTRATION_SUCCESS_COUNT.getName()); + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/block/TestSpecificMasterBlockSync.java b/core/server/worker/src/main/java/alluxio/worker/block/TestSpecificMasterBlockSync.java new file mode 100644 index 000000000000..d6f3761f9b10 --- /dev/null +++ b/core/server/worker/src/main/java/alluxio/worker/block/TestSpecificMasterBlockSync.java @@ -0,0 +1,82 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import alluxio.exception.FailedToAcquireRegisterLeaseException; +import alluxio.exception.runtime.UnavailableRuntimeException; + +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicInteger; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * A test {@link SpecificMasterBlockSync} that adds some interfaces for testing. + */ +@NotThreadSafe +@VisibleForTesting +public final class TestSpecificMasterBlockSync extends SpecificMasterBlockSync { + private static final Logger LOG = LoggerFactory.getLogger(TestSpecificMasterBlockSync.class); + private volatile boolean mFailHeartbeat = false; + private final AtomicInteger mRegistrationSuccessCount = new AtomicInteger(0); + + /** + * Creates a new instance of {@link SpecificMasterBlockSync}. + * + * @param blockWorker the {@link BlockWorker} this syncer is updating to + * @param masterClient the block master client + * @param heartbeatReporter the heartbeat reporter + */ + public TestSpecificMasterBlockSync( + BlockWorker blockWorker, BlockMasterClient masterClient, + BlockHeartbeatReporter heartbeatReporter) throws IOException { + super(blockWorker, masterClient, heartbeatReporter); + } + + /** + * Restores the heartbeat. + */ + public void restoreHeartbeat() { + mFailHeartbeat = false; + } + + /** + * Fails the heartbeat and lets it throws an exception. + */ + public void failHeartbeat() { + mFailHeartbeat = true; + } + + /** + * @return registration success count + */ + public int getRegistrationSuccessCount() { + return mRegistrationSuccessCount.get(); + } + + @Override + protected void registerWithMasterInternal() + throws IOException, FailedToAcquireRegisterLeaseException { + super.registerWithMasterInternal(); + mRegistrationSuccessCount.incrementAndGet(); + } + + @Override + protected void beforeHeartbeat() { + if (mFailHeartbeat) { + throw new UnavailableRuntimeException("Heartbeat paused"); + } + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java index b9806903f125..ce447d94abce 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java @@ -30,6 +30,8 @@ import alluxio.worker.block.io.BlockReader; import alluxio.worker.block.io.BlockWriter; import alluxio.worker.block.io.DelegatingBlockReader; +import alluxio.worker.block.io.LocalFileBlockReader; +import alluxio.worker.block.io.MetricAccountingBlockReader; import alluxio.worker.block.io.StoreBlockReader; import alluxio.worker.block.io.StoreBlockWriter; import alluxio.worker.block.management.DefaultStoreLoadTracker; @@ -48,6 +50,11 @@ import java.io.Closeable; import java.io.IOException; import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; import java.text.MessageFormat; import java.util.Collections; import java.util.HashSet; @@ -188,11 +195,39 @@ public BlockReader createBlockReader(long sessionId, long blockId, long offset) blockLock.close(); throw new BlockDoesNotExistRuntimeException(blockId); } + BlockMeta block = blockMeta.get(); try { - BlockReader reader = new StoreBlockReader(sessionId, blockMeta.get()); + validateBlockIntegrityForRead(block); + } catch (IllegalStateException validationError) { + LOG.warn("Block {} is corrupted, removing it: {}", + blockId, validationError.getMessage()); + // in case of a corrupted block, remove it and propagate the exception + // release the read lock because removeBlockInternal needs a write lock on the same block + blockLock.close(); + // at this point we are not holding any lock, so two threads may attempt to remove the same + // block concurrently. This is fine as long as removeBlockInternal is no-op for a + // non-existing block. + try { + removeBlockInternal(sessionId, blockId, REMOVE_BLOCK_TIMEOUT_MS); + for (BlockStoreEventListener listener : mBlockStoreEventListeners) { + synchronized (listener) { + listener.onRemoveBlockByWorker(blockId); + listener.onRemoveBlock(blockId, block.getBlockLocation()); + } + } + } catch (Exception removeBlockError) { + LOG.warn("Failed to remove a corrupted block {}", blockId, removeBlockError); + validationError.addSuppressed(removeBlockError); + } + throw new BlockDoesNotExistRuntimeException(blockId, validationError); + } + + try { + LocalFileBlockReader reader = new StoreBlockReader(sessionId, block); ((FileChannel) reader.getChannel()).position(offset); accessBlock(sessionId, blockId); - return new DelegatingBlockReader(reader, blockLock); + BlockReader mareader = new MetricAccountingBlockReader(reader); + return new DelegatingBlockReader(mareader, blockLock); } catch (Exception e) { blockLock.close(); throw new IOException(format("Failed to get local block reader, sessionId=%d, " @@ -200,6 +235,57 @@ public BlockReader createBlockReader(long sessionId, long blockId, long offset) } } + /** + * Validates the integrity of the block for reading: + * 1. the block file should exist + * 2. the length of the block file should match its BlockMeta + * If any of the above does not hold, this can be a result of corrupted block files + * due to faulty storage hardware, manual manipulation of the block files by admin, + * or a bug where the block was pre-maturely committed when it was not done writing. + * + * @param blockMeta the block meta acquired from meta data manager + * @throws IllegalStateException if the block is deemed corrupted + */ + public static void validateBlockIntegrityForRead(BlockMeta blockMeta) + throws IllegalStateException { + final long blockId = blockMeta.getBlockId(); + final Path blockPath = Paths.get(blockMeta.getPath()); + final BasicFileAttributes blockFileAttrs; + try { + blockFileAttrs = Files.readAttributes(blockPath, BasicFileAttributes.class); + } catch (NoSuchFileException e) { + throw new IllegalStateException(String.format( + "Block %s exists in block meta but actual physical block file %s does not exist", + blockId, blockPath)); + } catch (IOException e) { + // cannot read file attributes, possibly due to bad permission or bad file type + LOG.debug("Cannot read file attributes for block {}", blockId, e); + throw new IllegalStateException(String.format( + "Cannot read attributes of file %s for block %s during validation", blockId, blockPath)); + } + // need to check if file is a regular file, as for directories and device files the file length + // is unspecified + if (!blockFileAttrs.isRegularFile()) { + throw new IllegalStateException(String.format( + "Block file %s for block %s is not a regular file", blockPath, blockId)); + } + final long actualLength = blockFileAttrs.size(); + final long expectedLength = blockMeta.getBlockSize(); + // check if the actual file length matches the expected length from block meta + if (actualLength != expectedLength) { + LOG.debug("Block {} is expected to be {} bytes, " + + "but the actual block file length is {}", blockId, expectedLength, actualLength); + // Note: we only errors out on 0-sized blocks which are definitely not correct + // but if the size is not 0, we treat it as valid + if (actualLength == 0) { + throw new IllegalStateException(String.format( + "Block %s exists in block meta but the size from block meta does not match that of " + + "the block file %s, expected block size = %d, actual block file length = %d", + blockId, blockPath, expectedLength, actualLength)); + } + } + } + @Override public TempBlockMeta createBlock(long sessionId, long blockId, AllocateOptions options) { LOG.debug("createBlock: sessionId={}, blockId={}, options={}", sessionId, blockId, options); @@ -233,7 +319,7 @@ public void commitBlock(long sessionId, long blockId, boolean pinOnCreate) { BlockStoreLocation loc = commitBlockInternal(sessionId, blockId, pinOnCreate); for (BlockStoreEventListener listener : mBlockStoreEventListeners) { synchronized (listener) { - listener.onCommitBlock(blockId, loc); + listener.onCommitBlockToLocal(blockId, loc); } } } @@ -253,7 +339,7 @@ public BlockLock commitBlockLocked(long sessionId, long blockId, boolean pinOnCr } for (BlockStoreEventListener listener : mBlockStoreEventListeners) { synchronized (listener) { - listener.onCommitBlock(blockId, loc); + listener.onCommitBlockToLocal(blockId, loc); } } return lock; @@ -498,7 +584,8 @@ private void abortBlockInternal(long sessionId, long blockId) { * @param pinOnCreate is block pinned on create * @return destination location to move the block */ - private BlockStoreLocation commitBlockInternal(long sessionId, long blockId, + @VisibleForTesting + BlockStoreLocation commitBlockInternal(long sessionId, long blockId, boolean pinOnCreate) { if (mMetaManager.hasBlockMeta(blockId)) { LOG.debug("Block {} has been in block store, this could be a retry due to master-side RPC " @@ -658,7 +745,7 @@ private TempBlockMeta createBlockMetaInternal(long sessionId, long blockId, bool * TODO(ggezer): Remove synchronized. * * @param sessionId the session id - * @param minContiguousBytes the minimum amount of contigious free space in bytes + * @param minContiguousBytes the minimum amount of contiguous free space in bytes * @param minAvailableBytes the minimum amount of free space in bytes * @param location the location to free space */ @@ -819,7 +906,7 @@ private MoveBlockResult moveBlockInternal(long sessionId, long blockId, * @param blockMeta block metadata */ private void removeBlockFileAndMeta(BlockMeta blockMeta) { - FileUtils.delete(blockMeta.getPath()); + FileUtils.deleteIfExists(blockMeta.getPath()); mMetaManager.removeBlockMeta(blockMeta); } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/UfsIOManager.java b/core/server/worker/src/main/java/alluxio/worker/block/UfsIOManager.java index de999c75d748..9fac931c5cae 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/UfsIOManager.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/UfsIOManager.java @@ -125,7 +125,7 @@ private void schedule() { */ @VisibleForTesting public double getUsedThroughput(Meter meter) { - return meter.getOneMinuteRate(); + return meter.getOneMinuteRate() / 60; } /** @@ -203,6 +203,10 @@ private int readInternal() { int bytesRead = 0; InputStream inStream = null; try (CloseableResource ufsResource = mUfsClient.acquireUfsResource()) { + if (mOptions.hasUser()) { + // Before interacting with ufs manager, set the user. + alluxio.security.authentication.AuthenticatedClientUser.set(mOptions.getUser()); + } inStream = mUfsInstreamCache.acquire(ufsResource.get(), mUfsPath, mFileId, OpenOptions.defaults().setOffset(mOffset) .setPositionShort(mOptions.getPositionShort())); diff --git a/core/server/worker/src/main/java/alluxio/worker/block/UnderFileSystemBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/UnderFileSystemBlockStore.java index 07df037ddc23..b52f428c78f3 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/UnderFileSystemBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/UnderFileSystemBlockStore.java @@ -387,11 +387,19 @@ public String toString() { } } - private static class BytesReadMetricKey { - private final AlluxioURI mUri; - private final String mUser; + /** + * create an BytesReadMetricKey. + */ + public static class BytesReadMetricKey { + public final AlluxioURI mUri; + public final String mUser; - BytesReadMetricKey(AlluxioURI uri, String user) { + /** + * create an instance of the key class. + * @param uri + * @param user + */ + public BytesReadMetricKey(AlluxioURI uri, String user) { mUri = uri; mUser = user; } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/WorkerMasterRegistrationState.java b/core/server/worker/src/main/java/alluxio/worker/block/WorkerMasterRegistrationState.java new file mode 100644 index 000000000000..20066eb3d90b --- /dev/null +++ b/core/server/worker/src/main/java/alluxio/worker/block/WorkerMasterRegistrationState.java @@ -0,0 +1,21 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +/** + * The enum class for worker master registration state. + */ +public enum WorkerMasterRegistrationState { + REGISTERED, + NOT_REGISTERED, + REGISTERING, +} diff --git a/core/server/worker/src/main/java/alluxio/worker/block/annotator/DefaultBlockIterator.java b/core/server/worker/src/main/java/alluxio/worker/block/annotator/DefaultBlockIterator.java index 00f521fbc660..a0ffa791cdfb 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/annotator/DefaultBlockIterator.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/annotator/DefaultBlockIterator.java @@ -386,7 +386,7 @@ public void onAccessBlock(long blockId, BlockStoreLocation location) { } @Override - public void onCommitBlock(long blockId, BlockStoreLocation location) { + public void onCommitBlockToLocal(long blockId, BlockStoreLocation location) { blockUpdated(blockId, location); } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/evictor/LRUEvictor.java b/core/server/worker/src/main/java/alluxio/worker/block/evictor/LRUEvictor.java index 6074e4358f4f..172f424a3958 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/evictor/LRUEvictor.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/evictor/LRUEvictor.java @@ -82,7 +82,7 @@ public void onAccessBlock(long blockId) { } @Override - public void onCommitBlock(long blockId, BlockStoreLocation location) { + public void onCommitBlockToLocal(long blockId, BlockStoreLocation location) { // Since the temp block has been committed, update Evictor about the new added blocks mLRUCache.put(blockId, UNUSED_MAP_VALUE); } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/io/MetricAccountingBlockReader.java b/core/server/worker/src/main/java/alluxio/worker/block/io/MetricAccountingBlockReader.java new file mode 100644 index 000000000000..1e58c31aef1f --- /dev/null +++ b/core/server/worker/src/main/java/alluxio/worker/block/io/MetricAccountingBlockReader.java @@ -0,0 +1,103 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block.io; + +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; + +import io.netty.buffer.ByteBuf; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.ReadableByteChannel; + +/** + * An reader class with metrics. + */ +public class MetricAccountingBlockReader extends BlockReader { + private final LocalFileBlockReader mBlockReader; + + /** + * A decorator of BlockReader. + * @param mblockReader block reader + */ + public MetricAccountingBlockReader(LocalFileBlockReader mblockReader) { + mBlockReader = mblockReader; + } + + @Override + public ByteBuffer read(long offset, long length) throws IOException { + ByteBuffer buffer = mBlockReader.read(offset, length); + int bytesReadFromCache = buffer.limit() - buffer.position(); + MetricsSystem.counter(MetricKey.WORKER_BYTES_READ_CACHE.getName()).inc(bytesReadFromCache); + return buffer; + } + + @Override + public long getLength() { + return mBlockReader.getLength(); + } + + @Override + public ReadableByteChannel getChannel() { + return new ReadableByteChannel() { + private final ReadableByteChannel mDelegate = mBlockReader.getChannel(); + @Override + public int read(ByteBuffer dst) throws IOException { + int bytesRead = mDelegate.read(dst); + if (bytesRead != -1) { + MetricsSystem.counter(MetricKey.WORKER_BYTES_READ_CACHE.getName()).inc(bytesRead); + } + return bytesRead; + } + + @Override + public boolean isOpen() { + return mDelegate.isOpen(); + } + + @Override + public void close() throws IOException { + mDelegate.close(); + } + }; + } + + @Override + public int transferTo(ByteBuf buf) throws IOException { + int bytesReadFromCache = mBlockReader.transferTo(buf); + if (bytesReadFromCache != -1) { + MetricsSystem.counter(MetricKey.WORKER_BYTES_READ_CACHE.getName()).inc(bytesReadFromCache); + } + return bytesReadFromCache; + } + + @Override + public boolean isClosed() { + return mBlockReader.isClosed(); + } + + @Override + public String getLocation() { + return mBlockReader.getLocation(); + } + + @Override + public String toString() { + return mBlockReader.toString(); + } + + @Override + public void close() throws IOException { + mBlockReader.close(); + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/AbstractWriteHandler.java b/core/server/worker/src/main/java/alluxio/worker/grpc/AbstractWriteHandler.java index 8e6c6659bc85..8eff962e860c 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/AbstractWriteHandler.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/AbstractWriteHandler.java @@ -409,6 +409,9 @@ protected void handleCommand(WriteRequestCommand command, T context) throws Exce */ private void replySuccess() { mContext.setDoneUnsafe(true); + mContext.getContentHash().ifPresent(contentHash -> mResponseObserver.onNext( + WriteResponse.newBuilder().setContentHash(contentHash).setOffset( + mContext.getPos()).build())); mResponseObserver.onCompleted(); } diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/BlockReadHandler.java b/core/server/worker/src/main/java/alluxio/worker/grpc/BlockReadHandler.java index bf0cf4c5603e..af8310e262bb 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/BlockReadHandler.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/BlockReadHandler.java @@ -462,28 +462,29 @@ private void runInternal() { } continue; } - if (error != null) { + if (eof || cancel || error != null) { try { completeRequest(mContext); } catch (Exception e) { - LOG.error("Failed to close the request.", e); - } - replyError(error); - } else if (eof || cancel) { - try { - completeRequest(mContext); - } catch (Exception e) { - LogUtils.warnWithException(LOG, "Exception occurred while completing read request, " - + "EOF/CANCEL sessionId: {}. {}", mContext.getRequest().getSessionId(), - mContext.getRequest(), e); - setError(new Error(AlluxioStatusException.fromThrowable(e), true)); + if (error != null) { + LOG.error("Failed to close the request.", e); + } else { + LogUtils.warnWithException(LOG, "Exception occurred while completing read request, " + + "EOF/CANCEL sessionId: {}. {}", mContext.getRequest().getSessionId(), + mContext.getRequest(), e); + error = new Error(AlluxioStatusException.fromThrowable(e), true); + } } - if (eof) { + if (error != null) { + replyError(error); + } else if (eof) { replyEof(); - } else { + } else if (cancel) { replyCancel(); } } + // Leave `!mResponse.isReady() && tooManyPendingChunks()` unhandled + // since the reader is not finished in that case and needs more rounds } /** diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcDataServer.java b/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcDataServer.java index 53fe2cd4aa88..d92823619986 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcDataServer.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcDataServer.java @@ -28,7 +28,9 @@ import alluxio.util.network.NettyUtils; import alluxio.worker.DataServer; import alluxio.worker.WorkerProcess; +import alluxio.worker.block.DefaultBlockWorker; +import com.codahale.metrics.Counter; import io.netty.buffer.PooledByteBufAllocator; import io.netty.channel.ChannelOption; import io.netty.channel.EventLoopGroup; @@ -122,8 +124,9 @@ public GrpcDataServer(final String hostName, final SocketAddress bindAddress, private GrpcServerBuilder createServerBuilder(String hostName, SocketAddress bindAddress, ChannelType type) { // Create an executor for Worker RPC server. + final Counter clientCounter = DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS; mRPCExecutor = ExecutorServiceBuilder.buildExecutorService( - ExecutorServiceBuilder.RpcExecutorHost.WORKER); + ExecutorServiceBuilder.RpcExecutorHost.WORKER, clientCounter); MetricsSystem.registerGaugeIfAbsent(MetricKey.WORKER_RPC_QUEUE_LENGTH.getName(), mRPCExecutor::getRpcQueueLength); MetricsSystem.registerGaugeIfAbsent(MetricKey.WORKER_RPC_THREAD_ACTIVE_COUNT.getName(), diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcExecutors.java b/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcExecutors.java index 83c5ac90c941..d8f6fa1c6b00 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcExecutors.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcExecutors.java @@ -20,10 +20,17 @@ import alluxio.security.authentication.AuthenticatedClientUser; import alluxio.util.ThreadFactoryUtils; import alluxio.util.executor.UniqueBlockingQueue; +import alluxio.worker.block.DefaultBlockWorker; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collection; import java.util.List; import java.util.concurrent.AbstractExecutorService; +import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.SynchronousQueue; import java.util.concurrent.ThreadPoolExecutor; @@ -35,6 +42,7 @@ */ @ThreadSafe public final class GrpcExecutors { + private static final Logger LOG = LoggerFactory.getLogger(GrpcExecutors.class); private static final long THREAD_STOP_MS = Constants.SECOND_MS * 10; private static final int THREADS_MIN = 4; @@ -44,17 +52,23 @@ public final class GrpcExecutors { THREAD_STOP_MS, TimeUnit.MILLISECONDS, new UniqueBlockingQueue<>( Configuration.getInt(PropertyKey.WORKER_NETWORK_ASYNC_CACHE_MANAGER_QUEUE_MAX)), ThreadFactoryUtils.build("CacheManagerExecutor-%d", true)); + // Async caching is an optimization internal to Alluxio, which can be aborted any time public static final ExecutorService CACHE_MANAGER_EXECUTOR = - new ImpersonateThreadPoolExecutor(CACHE_MANAGER_THREAD_POOL_EXECUTOR); + new ImpersonateThreadPoolExecutor(CACHE_MANAGER_THREAD_POOL_EXECUTOR, false); + // Used by BlockWorkerClientServiceHandler.readBlock() by DataReader threads, + // where each DataReader reads a block content for reply. + // The thread pool queue is always empty. private static final ThreadPoolExecutor BLOCK_READER_THREAD_POOL_EXECUTOR = new ThreadPoolExecutor(THREADS_MIN, Configuration.getInt( PropertyKey.WORKER_NETWORK_BLOCK_READER_THREADS_MAX), THREAD_STOP_MS, TimeUnit.MILLISECONDS, new SynchronousQueue<>(), ThreadFactoryUtils.build("BlockDataReaderExecutor-%d", true)); public static final ExecutorService BLOCK_READER_EXECUTOR = - new ImpersonateThreadPoolExecutor(BLOCK_READER_THREAD_POOL_EXECUTOR); + new ImpersonateThreadPoolExecutor(BLOCK_READER_THREAD_POOL_EXECUTOR, true); + // Used for replying data to the client in BlockReadHandler. + // The thread pool has a small queue of a constant size. private static final ThreadPoolExecutor BLOCK_SERIALIZED_THREAD_POOL_EXECUTOR = new ThreadPoolExecutor(THREADS_MIN, Configuration.getInt(PropertyKey.WORKER_NETWORK_BLOCK_READER_THREADS_MAX), @@ -62,15 +76,16 @@ public final class GrpcExecutors { ThreadFactoryUtils.build("BlockDataReaderSerializedExecutor-%d", true), new ThreadPoolExecutor.CallerRunsPolicy()); public static final ExecutorService BLOCK_READER_SERIALIZED_RUNNER_EXECUTOR = - new ImpersonateThreadPoolExecutor(BLOCK_SERIALIZED_THREAD_POOL_EXECUTOR); + new ImpersonateThreadPoolExecutor(BLOCK_SERIALIZED_THREAD_POOL_EXECUTOR, true); + // Used for writing blocks. The queue is always empty. private static final ThreadPoolExecutor BLOCK_WRITE_THREAD_POOL_EXECUTOR = new ThreadPoolExecutor(THREADS_MIN, Configuration.getInt( PropertyKey.WORKER_NETWORK_BLOCK_WRITER_THREADS_MAX), THREAD_STOP_MS, TimeUnit.MILLISECONDS, new SynchronousQueue<>(), ThreadFactoryUtils.build("BlockDataWriterExecutor-%d", true)); public static final ExecutorService BLOCK_WRITER_EXECUTOR = - new ImpersonateThreadPoolExecutor(BLOCK_WRITE_THREAD_POOL_EXECUTOR); + new ImpersonateThreadPoolExecutor(BLOCK_WRITE_THREAD_POOL_EXECUTOR, true); static { MetricsSystem.registerCachedGaugeIfAbsent(MetricsSystem.getMetricName( @@ -144,9 +159,11 @@ private GrpcExecutors() {} * */ private static class ImpersonateThreadPoolExecutor extends AbstractExecutorService { private final ExecutorService mDelegate; + private final boolean mTracked; - public ImpersonateThreadPoolExecutor(ExecutorService service) { + public ImpersonateThreadPoolExecutor(ExecutorService service, boolean tracked) { mDelegate = service; + mTracked = tracked; } @Override @@ -154,22 +171,128 @@ public void execute(final Runnable command) { // If there's no impersonation, proxyUser is just null User proxyUser = AuthenticatedClientUser.getOrNull(); mDelegate.execute(() -> { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.inc(); + } try { +// SleepUtils.sleepMs(1000); AuthenticatedClientUser.set(proxyUser); command.run(); } finally { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.dec(); + } AuthenticatedClientUser.remove(); } }); } + @Override + public Future submit(Callable task) { + // If there's no impersonation, proxyUser is just null + User proxyUser = AuthenticatedClientUser.getOrNull(); + return mDelegate.submit(() -> { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.inc(); + } + try { +// SleepUtils.sleepMs(1000); + AuthenticatedClientUser.set(proxyUser); + return task.call(); + } finally { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.dec(); + } + AuthenticatedClientUser.remove(); + } + }); + } + + @Override + public Future submit(Runnable task, T result) { + // If there's no impersonation, proxyUser is just null + User proxyUser = AuthenticatedClientUser.getOrNull(); + return mDelegate.submit(() -> { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.inc(); + } + try { +// SleepUtils.sleepMs(1000); + AuthenticatedClientUser.set(proxyUser); + task.run(); + } finally { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.dec(); + } + AuthenticatedClientUser.remove(); + } + }, result); + } + + @Override + public Future submit(Runnable task) { + // If there's no impersonation, proxyUser is just null + User proxyUser = AuthenticatedClientUser.getOrNull(); + return mDelegate.submit(() -> { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.inc(); + } + try { +// SleepUtils.sleepMs(1000); + AuthenticatedClientUser.set(proxyUser); + task.run(); + } finally { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.dec(); + } + AuthenticatedClientUser.remove(); + } + }); + } + + @Override + public List> invokeAll(Collection> tasks) + throws InterruptedException { + // Not used. Also the active counter is hard, so we do not support it. + throw new UnsupportedOperationException("invokeAll(Collection) is not supported"); + } + + @Override + public List> invokeAll(Collection> tasks, long timeout, + TimeUnit unit) throws InterruptedException { + // Not used. Also the active counter is hard, so we do not support it. + throw new UnsupportedOperationException( + "invokeAll(Collection,long,TimeUnit) is not supported"); + } + + @Override + public T invokeAny(Collection> tasks) { + // Not used. Also the active counter is hard, so we do not support it. + throw new UnsupportedOperationException("invokeAny(Callable) is not supported"); + } + + @Override + public T invokeAny(Collection> tasks, long timeout, TimeUnit unit) { + // Not used. Also the active counter is hard, so we do not support it. + throw new UnsupportedOperationException( + "invokeAny(Callable,long,TimeUnit) is not supported"); + } + @Override public void shutdown() { + long operationCount = DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.getCount(); + if (operationCount > 0) { + LOG.warn("{} operations have not completed at shutdown()", operationCount); + } mDelegate.shutdown(); } @Override public List shutdownNow() { + long operationCount = DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.getCount(); + if (operationCount > 0) { + LOG.warn("{} operations have not completed at shutdownNow()", operationCount); + } return mDelegate.shutdownNow(); } diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/ShortCircuitBlockReadHandler.java b/core/server/worker/src/main/java/alluxio/worker/grpc/ShortCircuitBlockReadHandler.java index 4b7608fab229..6988c5aa63cf 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/ShortCircuitBlockReadHandler.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/ShortCircuitBlockReadHandler.java @@ -27,6 +27,7 @@ import alluxio.worker.block.BlockStore; import alluxio.worker.block.BlockStoreLocation; import alluxio.worker.block.DefaultBlockWorker; +import alluxio.worker.block.TieredBlockStore; import alluxio.worker.block.meta.BlockMeta; import io.grpc.stub.StreamObserver; @@ -86,6 +87,13 @@ public OpenLocalBlockResponse call() throws Exception { if (!meta.isPresent()) { throw new BlockDoesNotExistRuntimeException(mRequest.getBlockId()); } + try { + // assuming the underlying BlockStore is TieredBlockStore, as it's the only impl + // that allows short-circuit read + TieredBlockStore.validateBlockIntegrityForRead(meta.get()); + } catch (IllegalStateException validationError) { + throw new BlockDoesNotExistRuntimeException(mRequest.getBlockId(), validationError); + } if (mRequest.getPromote()) { // TODO(calvin): Move this logic into BlockStore#moveBlockInternal if possible // Because the move operation is expensive, we first check if the operation is necessary diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/UfsFileWriteHandler.java b/core/server/worker/src/main/java/alluxio/worker/grpc/UfsFileWriteHandler.java index 32a574bda853..4cf496483640 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/UfsFileWriteHandler.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/UfsFileWriteHandler.java @@ -22,6 +22,7 @@ import alluxio.resource.CloseableResource; import alluxio.security.authentication.AuthenticatedUserInfo; import alluxio.security.authorization.Mode; +import alluxio.underfs.ContentHashable; import alluxio.underfs.UfsManager; import alluxio.underfs.UnderFileSystem; import alluxio.underfs.options.CreateOptions; @@ -83,6 +84,14 @@ protected void completeRequest(UfsFileWriteRequestContext context) } Preconditions.checkState(context.getOutputStream() != null); context.getOutputStream().close(); + if (context.getOutputStream() instanceof ContentHashable) { + try { + ((ContentHashable) context.getOutputStream()).getContentHash() + .ifPresent(context::setContentHash); + } catch (IOException e) { + LOG.warn("Error getting content hash after completing file", e); + } + } CreateOptions createOptions = context.getCreateOptions(); if (createOptions != null) { try { diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/WriteRequestContext.java b/core/server/worker/src/main/java/alluxio/worker/grpc/WriteRequestContext.java index f36652daa8a0..c271b049d4ff 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/WriteRequestContext.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/WriteRequestContext.java @@ -14,6 +14,7 @@ import com.codahale.metrics.Counter; import com.codahale.metrics.Meter; +import java.util.Optional; import javax.annotation.Nullable; import javax.annotation.concurrent.GuardedBy; import javax.annotation.concurrent.ThreadSafe; @@ -43,6 +44,8 @@ public class WriteRequestContext { */ private long mPos; + private String mContentHash = null; + private Counter mCounter; private Meter mMeter; @@ -65,6 +68,20 @@ public T getRequest() { return mRequest; } + /** + * @return the content hash + */ + public Optional getContentHash() { + return Optional.ofNullable(mContentHash); + } + + /** + * @param contentHash the content hash of the written file + */ + public void setContentHash(String contentHash) { + mContentHash = contentHash; + } + /** * @return the error */ diff --git a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockReader.java b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockReader.java index a0a09d2b8b41..41b7a95d4016 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockReader.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockReader.java @@ -39,7 +39,6 @@ */ @NotThreadSafe public class PagedBlockReader extends BlockReader { - private static final ByteBuffer EMPTY_BYTE_BUFFER = ByteBuffer.allocate(0); private final long mPageSize; private final CacheManager mCacheManager; @@ -122,6 +121,7 @@ private long read(ByteBuf byteBuf, long offset, long length) throws IOException bytesRead += bytesReadFromCache; MetricsSystem.meter(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE.getName()).mark(bytesRead); mReadFromLocalCache = true; + MetricsSystem.counter(MetricKey.WORKER_BYTES_READ_CACHE.getName()).inc(bytesReadFromCache); } else { if (!mUfsBlockReader.isPresent()) { throw new AlluxioRuntimeException( diff --git a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java index 90837874c864..7fd540fb63f7 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java @@ -16,14 +16,19 @@ import alluxio.client.file.cache.CacheManager; import alluxio.client.file.cache.CacheManagerOptions; +import alluxio.client.file.cache.PageId; +import alluxio.client.file.cache.PageInfo; import alluxio.client.file.cache.store.PageStoreDir; import alluxio.conf.AlluxioConfiguration; import alluxio.conf.Configuration; import alluxio.exception.BlockAlreadyExistsException; import alluxio.exception.ExceptionMessage; +import alluxio.exception.PageNotFoundException; import alluxio.exception.runtime.AlluxioRuntimeException; import alluxio.exception.runtime.AlreadyExistsRuntimeException; import alluxio.exception.runtime.BlockDoesNotExistRuntimeException; +import alluxio.exception.runtime.NotFoundRuntimeException; +import alluxio.exception.status.DeadlineExceededException; import alluxio.grpc.Block; import alluxio.grpc.BlockStatus; import alluxio.grpc.ErrorType; @@ -62,6 +67,7 @@ import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; /** @@ -85,12 +91,13 @@ public class PagedBlockStore implements BlockStore { private final List mBlockStoreEventListeners = new CopyOnWriteArrayList<>(); private final long mPageSize; + private static final Long REMOVE_BLOCK_TIMEOUT_MS = 60_000L; /** * Create an instance of PagedBlockStore. - * @param ufsManager - * @param pool - * @param workerId + * @param ufsManager the UFS manager + * @param pool a client pool for talking to the block master + * @param workerId the worker id * @return an instance of PagedBlockStore */ public static PagedBlockStore create(UfsManager ufsManager, BlockMasterClientPool pool, @@ -166,7 +173,19 @@ public void commitBlock(long sessionId, long blockId, boolean pinOnCreate) { pageStoreDir.commit(BlockPageId.tempFileIdOf(blockId), BlockPageId.fileIdOf(blockId, blockMeta.getBlockSize())); final PagedBlockMeta committed = mPageMetaStore.commit(blockId); + BlockStoreLocation blockLocation = + new BlockStoreLocation(DEFAULT_TIER, getDirIndexOfBlock(blockId)); + for (BlockStoreEventListener listener : mBlockStoreEventListeners) { + synchronized (listener) { + listener.onCommitBlockToLocal(blockId, blockLocation); + } + } commitBlockToMaster(committed); + for (BlockStoreEventListener listener : mBlockStoreEventListeners) { + synchronized (listener) { + listener.onCommitBlockToMaster(blockId, blockLocation); + } + } } catch (IOException e) { throw AlluxioRuntimeException.from(e); } finally { @@ -195,13 +214,6 @@ private void commitBlockToMaster(PagedBlockMeta blockMeta) { } finally { mBlockMasterClientPool.release(bmc); } - BlockStoreLocation blockLocation = - new BlockStoreLocation(DEFAULT_TIER, getDirIndexOfBlock(blockId)); - for (BlockStoreEventListener listener : mBlockStoreEventListeners) { - synchronized (listener) { - listener.onCommitBlock(blockId, blockLocation); - } - } } @Override @@ -288,7 +300,7 @@ private BlockReader getBlockReader(PagedBlockMeta blockMeta, long offset, } final Optional ufsBlockReader = readOptions.map(opt -> new PagedUfsBlockReader( - mUfsManager, mUfsInStreamCache, blockMeta, offset, opt, mPageSize)); + mUfsManager, mUfsInStreamCache, blockMeta, offset, opt, mPageSize)); return new PagedBlockReader(mCacheManager, blockMeta, offset, ufsBlockReader, mPageSize); } @@ -365,34 +377,31 @@ public BlockWriter createBlockWriter(long sessionId, long blockId) String.format("Cannot overwrite an existing block %d", blockId))); } + /** + * Return mCacheManager.mState.get() for CommitTest. + * @return the mState, like READ_ONLY, READ_WRITE, NOT_IN_USE + */ + public CacheManager.State getCacheManagerState() { + return mCacheManager.state(); + } + @Override public void moveBlock(long sessionId, long blockId, AllocateOptions moveOptions) throws IOException { // TODO(bowen): implement actual move and replace placeholder values - int dirIndex = getDirIndexOfBlock(blockId); - BlockStoreLocation srcLocation = new BlockStoreLocation(DEFAULT_TIER, dirIndex); - BlockStoreLocation destLocation = moveOptions.getLocation(); - for (BlockStoreEventListener listener : mBlockStoreEventListeners) { - synchronized (listener) { - listener.onMoveBlockByClient(blockId, srcLocation, destLocation); - } - } - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException("moveBlock"); } @Override public void removeBlock(long sessionId, long blockId) throws IOException { LOG.debug("removeBlock: sessionId={}, blockId={}", sessionId, blockId); - // TODO(bowen): implement actual removal and replace placeholder values - boolean removeSuccess = true; int dirIndex = getDirIndexOfBlock(blockId); + removeBlockInternal(sessionId, blockId, REMOVE_BLOCK_TIMEOUT_MS); for (BlockStoreEventListener listener : mBlockStoreEventListeners) { synchronized (listener) { listener.onRemoveBlockByClient(blockId); - if (removeSuccess) { - BlockStoreLocation removedFrom = new BlockStoreLocation(DEFAULT_TIER, dirIndex); - listener.onRemoveBlock(blockId, removedFrom); - } + BlockStoreLocation removedFrom = new BlockStoreLocation(DEFAULT_TIER, dirIndex); + listener.onRemoveBlock(blockId, removedFrom); } } } @@ -494,4 +503,35 @@ private int getDirIndexOfBlock(long blockId) { .getDir() .getDirIndex(); } + + private void removeBlockInternal(long sessionId, long blockId, long timeoutMs) + throws IOException { + Optional optionalLock = + mLockManager.tryAcquireBlockLock(sessionId, blockId, BlockLockType.WRITE, + timeoutMs, TimeUnit.MILLISECONDS); + if (!optionalLock.isPresent()) { + throw new DeadlineExceededException( + String.format("Can not acquire lock to remove block %d for session %d after %d ms", + blockId, sessionId, timeoutMs)); + } + try (BlockLock blockLock = optionalLock.get()) { + Set pageIds; + try (LockResource metaLock = new LockResource(mPageMetaStore.getLock().writeLock())) { + if (mPageMetaStore.hasTempBlock(blockId)) { + throw new IllegalStateException( + ExceptionMessage.REMOVE_UNCOMMITTED_BLOCK.getMessage(blockId)); + } + pageIds = mPageMetaStore.getBlock(blockId) + .orElseThrow(() -> new BlockDoesNotExistRuntimeException(blockId)) + .getDir().getBlockPages(blockId); + + for (PageId pageId : pageIds) { + PageInfo pageInfo = mPageMetaStore.removePage(pageId); + pageInfo.getLocalCacheDir().getPageStore().delete(pageId); + } + } + } catch (PageNotFoundException e) { + throw new NotFoundRuntimeException("Page not found: " + e.getMessage(), e); + } + } } diff --git a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStoreDir.java b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStoreDir.java index d1ca6ad31dfc..b5d232651200 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStoreDir.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStoreDir.java @@ -14,6 +14,7 @@ import static alluxio.worker.page.PagedBlockStoreMeta.DEFAULT_MEDIUM; import static alluxio.worker.page.PagedBlockStoreMeta.DEFAULT_TIER; +import alluxio.client.file.cache.PageId; import alluxio.client.file.cache.PageInfo; import alluxio.client.file.cache.PageStore; import alluxio.client.file.cache.store.PageStoreDir; @@ -265,4 +266,15 @@ public long getTempBlockCachedBytes(long blockId) { public int getBlockCachedPages(long blockId) { return mBlockToPagesMap.get(blockId).size(); } + + /** + * Gets pages in this block. + * + * @param blockId the block id + * @return pages in this block being cached + */ + public Set getBlockPages(long blockId) { + return mBlockToPagesMap.get(blockId).stream().map(PageInfo::getPageId) + .collect(Collectors.toSet()); + } } diff --git a/core/server/worker/src/main/java/alluxio/worker/page/PagedUfsBlockReader.java b/core/server/worker/src/main/java/alluxio/worker/page/PagedUfsBlockReader.java index ef79f18348da..dd4d132a4bef 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/PagedUfsBlockReader.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/PagedUfsBlockReader.java @@ -12,6 +12,12 @@ package alluxio.worker.page; import alluxio.conf.PropertyKey; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.exception.status.NotFoundException; +import alluxio.exception.status.UnavailableException; +import alluxio.metrics.MetricInfo; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; import alluxio.network.protocol.databuffer.NioDirectBufferPool; import alluxio.resource.CloseableResource; import alluxio.underfs.UfsManager; @@ -19,9 +25,11 @@ import alluxio.underfs.options.OpenOptions; import alluxio.util.IdUtils; import alluxio.worker.block.UfsInputStreamCache; +import alluxio.worker.block.UnderFileSystemBlockStore.BytesReadMetricKey; import alluxio.worker.block.io.BlockReader; import alluxio.worker.block.meta.BlockMeta; +import com.codahale.metrics.Counter; import com.google.common.base.Preconditions; import io.netty.buffer.ByteBuf; @@ -31,6 +39,8 @@ import java.nio.channels.Channels; import java.nio.channels.ClosedChannelException; import java.nio.channels.ReadableByteChannel; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; /** * Block reader that reads from UFS. @@ -47,6 +57,9 @@ public class PagedUfsBlockReader extends BlockReader { private long mLastPageIndex = -1; private boolean mClosed = false; private long mPosition; + private final ConcurrentMap mUfsBytesReadMetrics = + new ConcurrentHashMap<>(); + private final Counter mUfsBytesRead; /** * @param ufsManager @@ -70,6 +83,23 @@ public PagedUfsBlockReader(UfsManager ufsManager, mInitialOffset = offset; mLastPage = ByteBuffer.allocateDirect((int) mPageSize); mPosition = offset; + try { + UfsManager.UfsClient ufsClient = mUfsManager.get(mUfsBlockOptions.getMountId()); + mUfsBytesRead = mUfsBytesReadMetrics.computeIfAbsent( + new BytesReadMetricKey(ufsClient.getUfsMountPointUri(), mUfsBlockOptions.getUser()), + key -> key.mUser == null + ? MetricsSystem.counterWithTags( + MetricKey.WORKER_BYTES_READ_UFS.getName(), + MetricKey.WORKER_BYTES_READ_UFS.isClusterAggregated(), + MetricInfo.TAG_UFS, MetricsSystem.escape(key.mUri)) + : MetricsSystem.counterWithTags( + MetricKey.WORKER_BYTES_READ_UFS.getName(), + MetricKey.WORKER_BYTES_READ_UFS.isClusterAggregated(), + MetricInfo.TAG_UFS, MetricsSystem.escape(key.mUri), + MetricInfo.TAG_USER, key.mUser)); + } catch (UnavailableException | NotFoundException e) { + throw AlluxioRuntimeException.from(e); + } } @Override @@ -145,6 +175,7 @@ public int readPageAtIndex(ByteBuffer buffer, long pageIndex) throws IOException mLastPage.flip(); mLastPageIndex = pageIndex; fillWithCachedPage(buffer, pageIndex * mPageSize, totalBytesRead); + mUfsBytesRead.inc(totalBytesRead); return totalBytesRead; } diff --git a/core/server/worker/src/main/java/alluxio/worker/page/UfsBlockReadOptions.java b/core/server/worker/src/main/java/alluxio/worker/page/UfsBlockReadOptions.java index 4156c63334cb..d6b43a4c197e 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/UfsBlockReadOptions.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/UfsBlockReadOptions.java @@ -16,6 +16,7 @@ import com.google.common.base.Preconditions; import java.util.Objects; +import javax.annotation.Nullable; /** * Options for reading a block from UFS. @@ -26,12 +27,15 @@ public final class UfsBlockReadOptions { private final long mOffsetInFile; private final String mUfsPath; private final boolean mCacheIntoAlluxio; + @Nullable private final String mUser; - UfsBlockReadOptions(long mountId, long offsetInFile, String ufsPath, boolean cacheIntoAlluxio) { + UfsBlockReadOptions(long mountId, long offsetInFile, String ufsPath, boolean cacheIntoAlluxio, + @Nullable String user) { mMountId = mountId; mOffsetInFile = offsetInFile; mUfsPath = ufsPath; mCacheIntoAlluxio = cacheIntoAlluxio; + mUser = user; } /** @@ -47,7 +51,7 @@ public static UfsBlockReadOptions fromProto(Protocol.OpenUfsBlockOptions options "missing offset in file for UFS block read"); Preconditions.checkArgument(options.hasUfsPath(), "missing UFS path for UFS block read"); return new UfsBlockReadOptions(options.getMountId(), - options.getOffsetInFile(), options.getUfsPath(), !options.getNoCache()); + options.getOffsetInFile(), options.getUfsPath(), !options.getNoCache(), options.getUser()); } /** @@ -71,6 +75,12 @@ public String getUfsPath() { return mUfsPath; } + /** + * + * @return user + */ + public String getUser() { return mUser; } + /** * @return whether the UFS block should be cached into Alluxio */ diff --git a/core/server/worker/src/test/java/alluxio/worker/block/AllMasterRegistrationBlockWorkerTest.java b/core/server/worker/src/test/java/alluxio/worker/block/AllMasterRegistrationBlockWorkerTest.java new file mode 100644 index 000000000000..44e118bdbc23 --- /dev/null +++ b/core/server/worker/src/test/java/alluxio/worker/block/AllMasterRegistrationBlockWorkerTest.java @@ -0,0 +1,75 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import alluxio.Sessions; +import alluxio.conf.PropertyKey; +import alluxio.master.journal.JournalType; + +import org.junit.Test; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Unit tests for {@link DefaultBlockWorker}. + */ +public class AllMasterRegistrationBlockWorkerTest extends DefaultBlockWorkerTestBase { + @Override + public void before() throws Exception { + mConfigurationRule.set(PropertyKey.WORKER_MASTER_CONNECT_RETRY_TIMEOUT, "5s"); + mConfigurationRule.set(PropertyKey.TEST_MODE, true); + mConfigurationRule.set(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS, true); + mConfigurationRule.set(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.EMBEDDED); + mConfigurationRule.set(PropertyKey.MASTER_RPC_ADDRESSES, + "localhost:19998,localhost:19988,localhost:19978"); + super.before(); + + when(mFileSystemMasterClient.getRemoteSockAddress()) + .thenReturn(InetSocketAddress.createUnresolved("localhost", 19998)); + + mBlockWorker = new AllMasterRegistrationBlockWorker( + mBlockMasterClientPool, mFileSystemMasterClient, + mock(Sessions.class), mBlockStore, new AtomicReference<>(INVALID_WORKER_ID)); + BlockSyncMasterGroup.setBlockMasterClientFactory( + new BlockSyncMasterGroup.BlockMasterClientFactory() { + @Override + BlockMasterClient create(InetSocketAddress address) { + return mBlockMasterClient; + } + }); + } + + @Test + public void workerMasterRegistrationFailed() throws IOException { + doThrow(new RuntimeException("error")).when(mBlockMasterClient).registerWithStream( + anyLong(), any(), any(), any(), any(), any(), any()); + Exception e = assertThrows(Exception.class, () -> mBlockWorker.start(WORKER_ADDRESS)); + assertTrue(e.getMessage().contains("Fatal error: Failed to register with primary master")); + } + + @Test + public void workerMasterRegistration() throws IOException { + mBlockWorker.start(WORKER_ADDRESS); + } + + // TODO(elega) add a test to confirm the worker can start when the registration to a standby fails +} diff --git a/core/server/worker/src/test/java/alluxio/worker/block/BlockHeartbeatReporterTest.java b/core/server/worker/src/test/java/alluxio/worker/block/BlockHeartbeatReporterTest.java index a04a7e4042a3..ebdcaa799a34 100644 --- a/core/server/worker/src/test/java/alluxio/worker/block/BlockHeartbeatReporterTest.java +++ b/core/server/worker/src/test/java/alluxio/worker/block/BlockHeartbeatReporterTest.java @@ -16,10 +16,16 @@ import static org.junit.Assert.assertTrue; import alluxio.Constants; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import com.google.common.collect.ImmutableMap; import org.junit.Before; import org.junit.Test; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; @@ -40,6 +46,7 @@ public final class BlockHeartbeatReporterTest { */ @Before public final void before() { + Configuration.set(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS, true); mReporter = new BlockHeartbeatReporter(); } @@ -54,18 +61,18 @@ private void removeBlock(long blockId) { } /** - * Tests the {@link BlockHeartbeatReporter#generateReport()} method for an empty report. + * Tests the {@link BlockHeartbeatReporter#generateReportAndClear()} method for an empty report. */ @Test public void generateReportEmpty() { - BlockHeartbeatReport report = mReporter.generateReport(); + BlockHeartbeatReport report = mReporter.generateReportAndClear(); assertTrue(report.getAddedBlocks().isEmpty()); assertTrue(report.getRemovedBlocks().isEmpty()); } /** - * Tests the {@link BlockHeartbeatReporter#generateReport()} method to correctly generate a report - * after moving block. + * Tests the {@link BlockHeartbeatReporter#generateReportAndClear()} + * method to correctly generate a report after moving block. */ @Test public void generateReportMove() { @@ -75,7 +82,7 @@ public void generateReportMove() { moveBlock(block1, MEM_LOC); moveBlock(block2, SSD_LOC); moveBlock(block3, HDD_LOC); - BlockHeartbeatReport report = mReporter.generateReport(); + BlockHeartbeatReport report = mReporter.generateReportAndClear(); Map> addedBlocks = report.getAddedBlocks(); // Block1 moved to memory @@ -95,8 +102,8 @@ public void generateReportMove() { } /** - * Tests the {@link BlockHeartbeatReporter#generateReport()} method that generating a report - * clears the state of the reporter. + * Tests the {@link BlockHeartbeatReporter#generateReportAndClear()} + * method that generating a report clears the state of the reporter. */ @Test public void generateReportStateClear() { @@ -104,18 +111,18 @@ public void generateReportStateClear() { moveBlock(block1, MEM_LOC); // First report should have updates - BlockHeartbeatReport report = mReporter.generateReport(); + BlockHeartbeatReport report = mReporter.generateReportAndClear(); assertFalse(report.getAddedBlocks().isEmpty()); // Second report should not have updates - BlockHeartbeatReport nextReport = mReporter.generateReport(); + BlockHeartbeatReport nextReport = mReporter.generateReportAndClear(); assertTrue(nextReport.getAddedBlocks().isEmpty()); assertTrue(nextReport.getRemovedBlocks().isEmpty()); } /** - * Tests the {@link BlockHeartbeatReporter#generateReport()} method to correctly generate a report - * after removing blocks. + * Tests the {@link BlockHeartbeatReporter#generateReportAndClear()} + * method to correctly generate a report after removing blocks. */ @Test public void generateReportRemove() { @@ -125,7 +132,7 @@ public void generateReportRemove() { removeBlock(block1); removeBlock(block2); removeBlock(block3); - BlockHeartbeatReport report = mReporter.generateReport(); + BlockHeartbeatReport report = mReporter.generateReportAndClear(); // All blocks should be removed List removedBlocks = report.getRemovedBlocks(); @@ -140,8 +147,8 @@ public void generateReportRemove() { } /** - * Tests the {@link BlockHeartbeatReporter#generateReport()} method to correctly generate a report - * after moving a block and the removing it. + * Tests the {@link BlockHeartbeatReporter#generateReportAndClear()} + * method to correctly generate a report after moving a block and the removing it. */ @Test public void generateReportMoveThenRemove() { @@ -150,7 +157,7 @@ public void generateReportMoveThenRemove() { removeBlock(block1); // The block should not be in the added blocks list - BlockHeartbeatReport report = mReporter.generateReport(); + BlockHeartbeatReport report = mReporter.generateReportAndClear(); assertEquals(null, report.getAddedBlocks().get(MEM_LOC)); // The block should be in the removed blocks list @@ -158,4 +165,49 @@ public void generateReportMoveThenRemove() { assertEquals(1, removedBlocks.size()); assertTrue(removedBlocks.contains(block1)); } + + @Test + public void generateAndRevert() { + mReporter.onMoveBlockByWorker(1, MEM_LOC, SSD_LOC); + mReporter.onMoveBlockByWorker(2, MEM_LOC, SSD_LOC); + mReporter.onMoveBlockByWorker(3, SSD_LOC, HDD_LOC); + mReporter.onRemoveBlockByClient(4); + mReporter.onStorageLost(Constants.MEDIUM_MEM, "/foo"); + mReporter.onStorageLost(Constants.MEDIUM_MEM, "/bar"); + BlockHeartbeatReport originalReport = mReporter.generateReportAndClear(); + mReporter.mergeBack(originalReport); + BlockHeartbeatReport newReport = mReporter.generateReportAndClear(); + assertEquals(originalReport.getAddedBlocks(), newReport.getAddedBlocks()); + assertEquals(originalReport.getRemovedBlocks(), newReport.getRemovedBlocks()); + assertEquals(originalReport.getLostStorage(), newReport.getLostStorage()); + } + + @Test + public void generateUpdateThenRevert() { + mReporter.onMoveBlockByWorker(1, HDD_LOC, MEM_LOC); + mReporter.onMoveBlockByWorker(2, HDD_LOC, MEM_LOC); + mReporter.onMoveBlockByWorker(3, HDD_LOC, SSD_LOC); + mReporter.onRemoveBlockByClient(4); + mReporter.onStorageLost(Constants.MEDIUM_MEM, "/foo"); + mReporter.onStorageLost(Constants.MEDIUM_HDD, "/bar"); + BlockHeartbeatReport originalReport = mReporter.generateReportAndClear(); + + mReporter.onRemoveBlockByClient(1); + mReporter.onRemoveBlockByClient(3); + mReporter.onRemoveBlockByClient(5); + mReporter.onMoveBlockByWorker(6, SSD_LOC, HDD_LOC); + mReporter.onMoveBlockByWorker(7, HDD_LOC, MEM_LOC); + mReporter.onStorageLost(Constants.MEDIUM_MEM, "/baz"); + mReporter.mergeBack(originalReport); + BlockHeartbeatReport newReport = mReporter.generateReportAndClear(); + + assertEquals(ImmutableMap.of( + MEM_LOC, Arrays.asList(7L, 2L), + HDD_LOC, Collections.singletonList(6L) + ), newReport.getAddedBlocks()); + assertEquals(new HashSet<>(Arrays.asList(1L, 3L, 4L, 5L)), + new HashSet<>(newReport.getRemovedBlocks())); + assertEquals(2, newReport.getLostStorage().get(Constants.MEDIUM_MEM).size()); + assertEquals(1, newReport.getLostStorage().get(Constants.MEDIUM_HDD).size()); + } } diff --git a/core/server/worker/src/test/java/alluxio/worker/block/BlockWorkerMetricsTest.java b/core/server/worker/src/test/java/alluxio/worker/block/BlockWorkerMetricsTest.java index 0ca981e5af98..972c9e4050b3 100644 --- a/core/server/worker/src/test/java/alluxio/worker/block/BlockWorkerMetricsTest.java +++ b/core/server/worker/src/test/java/alluxio/worker/block/BlockWorkerMetricsTest.java @@ -52,19 +52,23 @@ public void before() throws Exception { } @Test - public void testMetricsCapacity() { + public void testMetricsCapacity() throws InterruptedException { when(mBlockStoreMeta.getCapacityBytes()).thenReturn(1000L); Assert.assertEquals(1000L, getGauge(MetricKey.WORKER_CAPACITY_TOTAL.getName())); when(mBlockStoreMeta.getUsedBytes()).thenReturn(200L); + // sleep 5 seconds because the timeout of this registered is CacheGauge, + // and it's update interval is 5 seconds + Thread.sleep(DefaultBlockWorker.CACHEGAUGE_UPDATE_INTERVAL); Assert.assertEquals(200L, getGauge(MetricKey.WORKER_CAPACITY_USED.getName())); Assert.assertEquals(800L, getGauge(MetricKey.WORKER_CAPACITY_FREE.getName())); } @Test - public void testMetricsTierCapacity() { + public void testMetricsTierCapacity() throws InterruptedException { when(mBlockStoreMeta.getCapacityBytesOnTiers()) .thenReturn(ImmutableMap.of(MEM, 1000L, HDD, 2000L)); when(mBlockStoreMeta.getUsedBytesOnTiers()).thenReturn(ImmutableMap.of(MEM, 100L, HDD, 200L)); + Thread.sleep(DefaultBlockWorker.CACHEGAUGE_UPDATE_INTERVAL); assertEquals(1000L, getGauge(MetricKey.WORKER_CAPACITY_TOTAL.getName() + MetricInfo.TIER + MEM)); assertEquals(2000L, diff --git a/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTest.java b/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTest.java index 1d4edc018c8a..fb33d9c8be1f 100644 --- a/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTest.java +++ b/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTest.java @@ -11,7 +11,6 @@ package alluxio.worker.block; -import static alluxio.util.CommonUtils.waitFor; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -19,23 +18,14 @@ import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.anyMap; import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; -import alluxio.AlluxioURI; -import alluxio.ConfigurationRule; import alluxio.Constants; -import alluxio.Sessions; import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; import alluxio.exception.runtime.AlluxioRuntimeException; import alluxio.exception.runtime.BlockDoesNotExistRuntimeException; import alluxio.exception.runtime.ResourceExhaustedRuntimeException; @@ -44,35 +34,20 @@ import alluxio.exception.status.UnavailableException; import alluxio.grpc.Block; import alluxio.grpc.BlockStatus; -import alluxio.grpc.CacheRequest; -import alluxio.grpc.Command; -import alluxio.grpc.CommandType; import alluxio.grpc.GetConfigurationPOptions; import alluxio.grpc.UfsReadOptions; -import alluxio.master.NoopUfsManager; import alluxio.proto.dataserver.Protocol; -import alluxio.underfs.UfsManager; -import alluxio.underfs.UnderFileSystemConfiguration; import alluxio.util.IdUtils; -import alluxio.util.WaitForOptions; import alluxio.util.io.BufferUtils; -import alluxio.util.network.NetworkAddressUtils; -import alluxio.wire.WorkerNetAddress; import alluxio.worker.block.io.BlockReader; import alluxio.worker.block.io.BlockWriter; import alluxio.worker.block.meta.TempBlockMeta; -import alluxio.worker.file.FileSystemMasterClient; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import org.junit.Before; -import org.junit.Rule; import org.junit.Test; -import org.junit.rules.TemporaryFolder; import java.io.BufferedOutputStream; -import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.nio.ByteBuffer; @@ -81,118 +56,13 @@ import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.Set; import java.util.concurrent.ExecutionException; -import java.util.concurrent.atomic.AtomicReference; /** * Unit tests for {@link DefaultBlockWorker}. */ -public class DefaultBlockWorkerTest { - private static final int BLOCK_SIZE = 128; - - TieredBlockStore mTieredBlockStore; - // worker configurations - private static final long WORKER_ID = 30L; - // ufs for fallback read - private static final long UFS_MOUNT_ID = 1L; - // ufs for batch load - private static final long UFS_LOAD_MOUNT_ID = 2L; - private static final WorkerNetAddress WORKER_ADDRESS = - new WorkerNetAddress().setHost("localhost").setRpcPort(20001); - - // invalid initial worker id - private static final long INVALID_WORKER_ID = -1L; - - // test subject - private DefaultBlockWorker mBlockWorker; - - // mocked dependencies of DefaultBlockWorker - private BlockMasterClient mBlockMasterClient; - private FileSystemMasterClient mFileSystemMasterClient; - - private final Random mRandom = new Random(); - - @Rule - public TemporaryFolder mTestFolder = new TemporaryFolder(); - // worker's local storage directories - private String mMemDir; - private String mHddDir; - // ufs file for fallback read - private File mTestUfsFile; - - // ufs root path for batch load - private String mRootUfs; - // ufs file for batch load - private String mTestLoadFilePath; - - @Rule - public ConfigurationRule mConfigurationRule = - new ConfigurationRule(new ImmutableMap.Builder() - .put(PropertyKey.WORKER_TIERED_STORE_LEVELS, 2) - .put(PropertyKey.WORKER_TIERED_STORE_LEVEL0_ALIAS, Constants.MEDIUM_MEM) - .put(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_MEDIUMTYPE, Constants.MEDIUM_MEM) - .put(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_QUOTA, "1GB") - .put(PropertyKey.WORKER_TIERED_STORE_LEVEL1_ALIAS, Constants.MEDIUM_HDD) - .put(PropertyKey.WORKER_TIERED_STORE_LEVEL1_DIRS_MEDIUMTYPE, Constants.MEDIUM_HDD) - .put(PropertyKey.WORKER_TIERED_STORE_LEVEL1_DIRS_QUOTA, "2GB") - .put(PropertyKey.WORKER_RPC_PORT, 0) - .put(PropertyKey.WORKER_MANAGEMENT_TIER_ALIGN_RESERVED_BYTES, "0") - .put(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS, "10ms") - .build(), Configuration.modifiableGlobal()); - private BlockStore mBlockStore; - - /** - * Sets up all dependencies before a test runs. - */ - @Before - public void before() throws Exception { - // set up storage directories - mMemDir = mTestFolder.newFolder().getAbsolutePath(); - mHddDir = mTestFolder.newFolder().getAbsolutePath(); - mConfigurationRule.set(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_PATH, mMemDir); - mConfigurationRule.set(PropertyKey.WORKER_TIERED_STORE_LEVEL1_DIRS_PATH, mHddDir); - - // set up BlockMasterClient - mBlockMasterClient = createMockBlockMasterClient(); - BlockMasterClientPool blockMasterClientPool = spy(new BlockMasterClientPool()); - doReturn(mBlockMasterClient).when(blockMasterClientPool).createNewResource(); - - mTieredBlockStore = spy(new TieredBlockStore()); - UfsManager ufsManager = new NoopUfsManager(); - AtomicReference workerId = new AtomicReference<>(INVALID_WORKER_ID); - mBlockStore = - spy(new MonoBlockStore(mTieredBlockStore, blockMasterClientPool, ufsManager, workerId)); - - mFileSystemMasterClient = createMockFileSystemMasterClient(); - - Sessions sessions = mock(Sessions.class); - - // set up a ufs directory for batch load jobs - mRootUfs = mTestFolder.newFolder("DefaultBlockWorkerTest").getAbsolutePath(); - mConfigurationRule.set(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS, mRootUfs); - ufsManager.addMount(UFS_LOAD_MOUNT_ID, - new AlluxioURI(mRootUfs), - UnderFileSystemConfiguration.defaults(Configuration.global())); - // Write an actual file to UFS - mTestLoadFilePath = mTestFolder.newFile("temp").getAbsolutePath(); - byte[] buffer = BufferUtils.getIncreasingByteArray((int) (BLOCK_SIZE * 1.5)); - BufferUtils.writeBufferToFile(mTestLoadFilePath, buffer); - - // set up ufs directory for fallback reading - mTestUfsFile = mTestFolder.newFile(); - // mount test file to UFS_MOUNT_ID - ufsManager.addMount( - UFS_MOUNT_ID, - new AlluxioURI(mTestUfsFile.getAbsolutePath()), - UnderFileSystemConfiguration.defaults(Configuration.global()) - ); - - mBlockWorker = new DefaultBlockWorker(blockMasterClientPool, mFileSystemMasterClient, - sessions, mBlockStore, workerId); - } - +public class DefaultBlockWorkerTest extends DefaultBlockWorkerTestBase { @Test public void getWorkerId() throws Exception { mBlockWorker.askForWorkerId(WORKER_ADDRESS); @@ -394,9 +264,9 @@ public void getStoreMeta() throws Exception { assertEquals(1, storeMeta.getBlockList().get("HDD").size()); Map> blockLocations = storeMeta.getBlockListByStorageLocation(); assertEquals(1, blockLocations.get( - new BlockStoreLocation("MEM", 0, "MEM")).size()); + new BlockStoreLocation("MEM", 0, "MEM")).size()); assertEquals(1, blockLocations.get( - new BlockStoreLocation("HDD", 0, "HDD")).size()); + new BlockStoreLocation("HDD", 0, "HDD")).size()); assertEquals(2, storeMeta.getNumberOfBlocks()); } @@ -637,102 +507,4 @@ public void cleanUpSession() throws Exception { // now another session should be able to grab write lock on the block mBlockWorker.removeBlock(anotherSessionId, blockId); } - - private void cacheBlock(boolean async) throws Exception { - // flush 1MB random data to ufs so that caching will take a while - long ufsBlockSize = 1024 * 1024; - byte[] data = new byte[(int) ufsBlockSize]; - mRandom.nextBytes(data); - - try (FileOutputStream fileOut = new FileOutputStream(mTestUfsFile); - BufferedOutputStream bufOut = new BufferedOutputStream(fileOut)) { - bufOut.write(data); - bufOut.flush(); - } - - // ufs options: delegate to the ufs mounted at UFS_MOUNT_ID - // with path to our test file - long blockId = mRandom.nextLong(); - Protocol.OpenUfsBlockOptions options = Protocol.OpenUfsBlockOptions - .newBuilder() - .setBlockSize(ufsBlockSize) - .setUfsPath(mTestUfsFile.getAbsolutePath()) - .setMountId(UFS_MOUNT_ID) - .setNoCache(false) - .setOffsetInFile(0) - .build(); - - // cache request: - // delegate to local ufs client rather than remote worker - CacheRequest request = CacheRequest - .newBuilder() - .setSourceHost(NetworkAddressUtils.getLocalHostName(500)) - .setBlockId(blockId) - .setLength(ufsBlockSize) - .setAsync(async) - .setOpenUfsBlockOptions(options) - .build(); - - mBlockWorker.cache(request); - - // check that the block metadata is present - if (async) { - assertFalse(mBlockWorker.getBlockStore().hasBlockMeta(blockId)); - waitFor( - "Wait for async cache", - () -> mBlockWorker.getBlockStore().hasBlockMeta(blockId), - WaitForOptions.defaults().setInterval(10).setTimeoutMs(2000)); - } else { - assertTrue(mBlockWorker.getBlockStore().hasBlockMeta(blockId)); - } - - long sessionId = mRandom.nextLong(); - // check that we can read the block locally - // note: this time we use an OpenUfsOption without ufsPath and blockInUfsTier so - // that the worker can't fall back to ufs read. - Protocol.OpenUfsBlockOptions noFallbackOptions = Protocol.OpenUfsBlockOptions.newBuilder() - .setBlockInUfsTier(false).build(); - try (BlockReader reader = mBlockWorker.createBlockReader( - sessionId, blockId, 0, false, noFallbackOptions)) { - ByteBuffer buf = reader.read(0, ufsBlockSize); - // alert: LocalFileBlockReader uses a MappedByteBuffer, which does not - // support the array operation. So we need to compare ByteBuffer manually - assertEquals(0, buf.compareTo(ByteBuffer.wrap(data))); - } - } - - // create a BlockMasterClient that simulates reasonable default - // interactions with the block master - private BlockMasterClient createMockBlockMasterClient() throws Exception { - BlockMasterClient client = mock(BlockMasterClient.class); - - // return designated worker id - doReturn(WORKER_ID) - .when(client) - .getId(any(WorkerNetAddress.class)); - - // return Command.Nothing for heartbeat - doReturn(Command.newBuilder().setCommandType(CommandType.Nothing).build()) - .when(client) - .heartbeat( - anyLong(), - anyMap(), - anyMap(), - anyList(), - anyMap(), - anyMap(), - anyList() - ); - return client; - } - - // create a mocked FileSystemMasterClient that simulates reasonable default - // interactions with file system master - private FileSystemMasterClient createMockFileSystemMasterClient() throws Exception { - FileSystemMasterClient client = mock(FileSystemMasterClient.class); - doReturn(ImmutableSet.of()) - .when(client) - .getPinList(); - return client; - } } diff --git a/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTestBase.java b/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTestBase.java new file mode 100644 index 000000000000..b7b3d53c5191 --- /dev/null +++ b/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTestBase.java @@ -0,0 +1,264 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import static alluxio.util.CommonUtils.waitFor; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; + +import alluxio.AlluxioURI; +import alluxio.ConfigurationRule; +import alluxio.Constants; +import alluxio.Sessions; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.CacheRequest; +import alluxio.grpc.Command; +import alluxio.grpc.CommandType; +import alluxio.master.NoopUfsManager; +import alluxio.proto.dataserver.Protocol; +import alluxio.underfs.UfsManager; +import alluxio.underfs.UnderFileSystemConfiguration; +import alluxio.util.WaitForOptions; +import alluxio.util.io.BufferUtils; +import alluxio.util.network.NetworkAddressUtils; +import alluxio.wire.WorkerNetAddress; +import alluxio.worker.block.io.BlockReader; +import alluxio.worker.file.FileSystemMasterClient; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.junit.Before; +import org.junit.Rule; +import org.junit.rules.TemporaryFolder; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.nio.ByteBuffer; +import java.util.Random; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Test base for block worker related tests. + */ +public class DefaultBlockWorkerTestBase { + protected static final int BLOCK_SIZE = 128; + + TieredBlockStore mTieredBlockStore; + // worker configurations + protected static final long WORKER_ID = 30L; + // ufs for fallback read + protected static final long UFS_MOUNT_ID = 1L; + // ufs for batch load + protected static final long UFS_LOAD_MOUNT_ID = 2L; + protected static final WorkerNetAddress WORKER_ADDRESS = + new WorkerNetAddress().setHost("localhost").setRpcPort(20001); + + // invalid initial worker id + protected static final long INVALID_WORKER_ID = -1L; + + // test subject + protected DefaultBlockWorker mBlockWorker; + + // mocked dependencies of DefaultBlockWorker + protected BlockMasterClient mBlockMasterClient; + protected FileSystemMasterClient mFileSystemMasterClient; + + protected final Random mRandom = new Random(); + + @Rule + public TemporaryFolder mTestFolder = new TemporaryFolder(); + // worker's local storage directories + protected String mMemDir; + protected String mHddDir; + // ufs file for fallback read + protected File mTestUfsFile; + + // ufs root path for batch load + protected String mRootUfs; + // ufs file for batch load + protected String mTestLoadFilePath; + protected BlockMasterClientPool mBlockMasterClientPool; + + @Rule + public ConfigurationRule mConfigurationRule = + new ConfigurationRule(new ImmutableMap.Builder() + .put(PropertyKey.WORKER_TIERED_STORE_LEVELS, 2) + .put(PropertyKey.WORKER_TIERED_STORE_LEVEL0_ALIAS, Constants.MEDIUM_MEM) + .put(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_MEDIUMTYPE, Constants.MEDIUM_MEM) + .put(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_QUOTA, "1GB") + .put(PropertyKey.WORKER_TIERED_STORE_LEVEL1_ALIAS, Constants.MEDIUM_HDD) + .put(PropertyKey.WORKER_TIERED_STORE_LEVEL1_DIRS_MEDIUMTYPE, Constants.MEDIUM_HDD) + .put(PropertyKey.WORKER_TIERED_STORE_LEVEL1_DIRS_QUOTA, "2GB") + .put(PropertyKey.WORKER_RPC_PORT, 0) + .put(PropertyKey.WORKER_MANAGEMENT_TIER_ALIGN_RESERVED_BYTES, "0") + .put(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS, "10ms") + .build(), Configuration.modifiableGlobal()); + protected BlockStore mBlockStore; + + /** + * Sets up all dependencies before a test runs. + */ + @Before + public void before() throws Exception { + // set up storage directories + mMemDir = mTestFolder.newFolder().getAbsolutePath(); + mHddDir = mTestFolder.newFolder().getAbsolutePath(); + mConfigurationRule.set(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_PATH, mMemDir); + mConfigurationRule.set(PropertyKey.WORKER_TIERED_STORE_LEVEL1_DIRS_PATH, mHddDir); + + // set up BlockMasterClient + mBlockMasterClient = createMockBlockMasterClient(); + mBlockMasterClientPool = spy(new BlockMasterClientPool()); + doReturn(mBlockMasterClient).when(mBlockMasterClientPool).createNewResource(); + + mTieredBlockStore = spy(new TieredBlockStore()); + UfsManager ufsManager = new NoopUfsManager(); + AtomicReference workerId = new AtomicReference<>(INVALID_WORKER_ID); + mBlockStore = + spy(new MonoBlockStore(mTieredBlockStore, mBlockMasterClientPool, ufsManager, workerId)); + + mFileSystemMasterClient = createMockFileSystemMasterClient(); + + Sessions sessions = mock(Sessions.class); + + // set up a ufs directory for batch load jobs + mRootUfs = mTestFolder.newFolder("DefaultBlockWorkerTest").getAbsolutePath(); + mConfigurationRule.set(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS, mRootUfs); + ufsManager.addMount(UFS_LOAD_MOUNT_ID, + new AlluxioURI(mRootUfs), + UnderFileSystemConfiguration.defaults(Configuration.global())); + // Write an actual file to UFS + mTestLoadFilePath = mTestFolder.newFile("temp").getAbsolutePath(); + byte[] buffer = BufferUtils.getIncreasingByteArray((int) (BLOCK_SIZE * 1.5)); + BufferUtils.writeBufferToFile(mTestLoadFilePath, buffer); + + // set up ufs directory for fallback reading + mTestUfsFile = mTestFolder.newFile(); + // mount test file to UFS_MOUNT_ID + ufsManager.addMount( + UFS_MOUNT_ID, + new AlluxioURI(mTestUfsFile.getAbsolutePath()), + UnderFileSystemConfiguration.defaults(Configuration.global()) + ); + + mBlockWorker = new DefaultBlockWorker(mBlockMasterClientPool, mFileSystemMasterClient, + sessions, mBlockStore, workerId); + } + + protected void cacheBlock(boolean async) throws Exception { + // flush 1MB random data to ufs so that caching will take a while + long ufsBlockSize = 1024 * 1024; + byte[] data = new byte[(int) ufsBlockSize]; + mRandom.nextBytes(data); + + try (FileOutputStream fileOut = new FileOutputStream(mTestUfsFile); + BufferedOutputStream bufOut = new BufferedOutputStream(fileOut)) { + bufOut.write(data); + bufOut.flush(); + } + + // ufs options: delegate to the ufs mounted at UFS_MOUNT_ID + // with path to our test file + long blockId = mRandom.nextLong(); + Protocol.OpenUfsBlockOptions options = Protocol.OpenUfsBlockOptions + .newBuilder() + .setBlockSize(ufsBlockSize) + .setUfsPath(mTestUfsFile.getAbsolutePath()) + .setMountId(UFS_MOUNT_ID) + .setNoCache(false) + .setOffsetInFile(0) + .build(); + + // cache request: + // delegate to local ufs client rather than remote worker + CacheRequest request = CacheRequest + .newBuilder() + .setSourceHost(NetworkAddressUtils.getLocalHostName(500)) + .setBlockId(blockId) + .setLength(ufsBlockSize) + .setAsync(async) + .setOpenUfsBlockOptions(options) + .build(); + + mBlockWorker.cache(request); + + // check that the block metadata is present + if (async) { + assertFalse(mBlockWorker.getBlockStore().hasBlockMeta(blockId)); + waitFor( + "Wait for async cache", + () -> mBlockWorker.getBlockStore().hasBlockMeta(blockId), + WaitForOptions.defaults().setInterval(10).setTimeoutMs(2000)); + } else { + assertTrue(mBlockWorker.getBlockStore().hasBlockMeta(blockId)); + } + + long sessionId = mRandom.nextLong(); + // check that we can read the block locally + // note: this time we use an OpenUfsOption without ufsPath and blockInUfsTier so + // that the worker can't fall back to ufs read. + Protocol.OpenUfsBlockOptions noFallbackOptions = Protocol.OpenUfsBlockOptions.newBuilder() + .setBlockInUfsTier(false).build(); + try (BlockReader reader = mBlockWorker.createBlockReader( + sessionId, blockId, 0, false, noFallbackOptions)) { + ByteBuffer buf = reader.read(0, ufsBlockSize); + // alert: LocalFileBlockReader uses a MappedByteBuffer, which does not + // support the array operation. So we need to compare ByteBuffer manually + assertEquals(0, buf.compareTo(ByteBuffer.wrap(data))); + } + } + + // create a BlockMasterClient that simulates reasonable default + // interactions with the block master + protected BlockMasterClient createMockBlockMasterClient() throws Exception { + BlockMasterClient client = mock(BlockMasterClient.class); + + // return designated worker id + doReturn(WORKER_ID) + .when(client) + .getId(any(WorkerNetAddress.class)); + + // return Command.Nothing for heartbeat + doReturn(Command.newBuilder().setCommandType(CommandType.Nothing).build()) + .when(client) + .heartbeat( + anyLong(), + anyMap(), + anyMap(), + anyList(), + anyMap(), + anyMap(), + anyList() + ); + return client; + } + + // create a mocked FileSystemMasterClient that simulates reasonable default + // interactions with file system master + protected FileSystemMasterClient createMockFileSystemMasterClient() throws Exception { + FileSystemMasterClient client = mock(FileSystemMasterClient.class); + doReturn(ImmutableSet.of()) + .when(client) + .getPinList(); + return client; + } +} diff --git a/core/server/worker/src/test/java/alluxio/worker/block/MonoBlockStoreCommitBlockTest.java b/core/server/worker/src/test/java/alluxio/worker/block/MonoBlockStoreCommitBlockTest.java new file mode 100644 index 000000000000..96eec7641404 --- /dev/null +++ b/core/server/worker/src/test/java/alluxio/worker/block/MonoBlockStoreCommitBlockTest.java @@ -0,0 +1,157 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyBoolean; +import static org.mockito.Mockito.anyLong; +import static org.mockito.Mockito.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import alluxio.exception.status.AlluxioStatusException; +import alluxio.underfs.UfsManager; +import alluxio.worker.block.io.BlockWriter; +import alluxio.worker.block.meta.StorageDir; + +import io.grpc.Status; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.concurrent.atomic.AtomicReference; + +// This Test is a little different from the PagedBlockStoreCommitStore due to structure different. +// MonoBlockStore.commitBlock() will call TieredBlockStore.commitBlocked() first as commitLocal, +// then will call BlockMasterClient.commitBlock() as commitMaster +// TieredBlockStore.commitBlock() call TieredBLockStore.commitBlockInternal inside them wake the +// EventListener for listener.onCommitToLocal() +// MonoBlockStore will wake the EventListener for listener.onCommitToMaster after +// BlockMasterClient.commitBlock() successes +// In a nutshell two onCommit events weren't called in same domain +public class MonoBlockStoreCommitBlockTest { + public MonoBlockStore mMonoBlockStore; + BlockMasterClientPool mMockedBlockMasterClientPool; + BlockMasterClient mMockedBlockMasterClient; + BlockMetadataManager mBlockMetadataManager; + BlockLockManager mBlockLockManager; + TieredBlockStore mTieredBlockStore; + private static final String FIRST_TIER_ALIAS = TieredBlockStoreTestUtils.TIER_ALIAS[0]; + private StorageDir mTestDir1; + /** Rule to create a new temporary folder during each test. */ + @Rule + public TemporaryFolder mTestFolder = new TemporaryFolder(); + + private static final Long SESSION_ID = 1L; + private static final long BLOCK_ID = 2L; + // Maybe location should be asserted as well. + BlockStoreEventListener mListener; + + @Before + public void setup() throws Exception { + File tempFolder = mTestFolder.newFolder(); + TieredBlockStoreTestUtils.setupDefaultConf(tempFolder.getAbsolutePath()); + + mMockedBlockMasterClientPool = mock(BlockMasterClientPool.class); + mMockedBlockMasterClient = mock(BlockMasterClient.class); + when(mMockedBlockMasterClientPool.acquire()).thenReturn(mMockedBlockMasterClient); + doNothing().when(mMockedBlockMasterClientPool).release(any()); + mBlockLockManager = new BlockLockManager(); + mBlockMetadataManager = BlockMetadataManager.createBlockMetadataManager(); + + mTestDir1 = mBlockMetadataManager.getTier(FIRST_TIER_ALIAS).getDir(0); + + mListener = spy(new AbstractBlockStoreEventListener() { + @Override + public void onCommitBlockToLocal(long blockId, BlockStoreLocation location) { + assertEquals(BLOCK_ID, blockId); + } + + @Override + public void onCommitBlockToMaster(long blockId, BlockStoreLocation location) { + assertEquals(BLOCK_ID, blockId); + } + }); + } + + @Test + public void commitLocalandCommitMasterBothSuccess() throws Exception { + mTieredBlockStore = new TieredBlockStore(mBlockMetadataManager, mBlockLockManager); + + prepareBlockStore(); + + mMonoBlockStore.commitBlock(SESSION_ID, BLOCK_ID, false); + + verify(mListener).onCommitBlockToLocal(anyLong(), any(BlockStoreLocation.class)); + verify(mListener).onCommitBlockToMaster(anyLong(), any(BlockStoreLocation.class)); + } + + @Test + public void commitLocalSuccessandCommitMasterFail() throws Exception { + doAnswer((i) -> { + throw new AlluxioStatusException(Status.UNAVAILABLE); + }).when(mMockedBlockMasterClient).commitBlock(anyLong(), anyLong(), anyString(), + anyString(), anyLong(), anyLong()); + mTieredBlockStore = new TieredBlockStore(mBlockMetadataManager, mBlockLockManager); + + prepareBlockStore(); + + assertThrows(RuntimeException.class, () -> { + mMonoBlockStore.commitBlock(SESSION_ID, BLOCK_ID, false); + }); + + verify(mListener).onCommitBlockToLocal(anyLong(), any(BlockStoreLocation.class)); + verify(mListener, never()).onCommitBlockToMaster(anyLong(), any(BlockStoreLocation.class)); + } + + @Test + public void commitLocalFailandCommitMasterSuccess() throws Exception { + mTieredBlockStore = spy(new TieredBlockStore(mBlockMetadataManager, mBlockLockManager)); + doAnswer((i) -> { + throw new RuntimeException(); + }).when(mTieredBlockStore).commitBlockInternal(anyLong(), anyLong(), anyBoolean()); + + prepareBlockStore(); + + assertThrows(RuntimeException.class, () -> { + mMonoBlockStore.commitBlock(SESSION_ID, BLOCK_ID, false); + }); + + verify(mListener, never()).onCommitBlockToLocal(anyLong(), any(BlockStoreLocation.class)); + verify(mListener, never()).onCommitBlockToMaster(anyLong(), any(BlockStoreLocation.class)); + } + + public void prepareBlockStore() throws Exception { + mMonoBlockStore = new MonoBlockStore(mTieredBlockStore, mMockedBlockMasterClientPool, + mock(UfsManager.class), new AtomicReference<>(1L)); + + TieredBlockStoreTestUtils.createTempBlock(SESSION_ID, BLOCK_ID, 64, mTestDir1); + + byte[] data = new byte[64]; + Arrays.fill(data, (byte) 1); + ByteBuffer buf = ByteBuffer.wrap(data); + BlockWriter writer = mMonoBlockStore.createBlockWriter(SESSION_ID, BLOCK_ID); + writer.append(buf); + mMonoBlockStore.registerBlockStoreEventListener(mListener); + } +} diff --git a/core/server/worker/src/test/java/alluxio/worker/block/NoopBlockWorker.java b/core/server/worker/src/test/java/alluxio/worker/block/NoopBlockWorker.java index 6381349827d5..9abef428ec75 100644 --- a/core/server/worker/src/test/java/alluxio/worker/block/NoopBlockWorker.java +++ b/core/server/worker/src/test/java/alluxio/worker/block/NoopBlockWorker.java @@ -162,6 +162,11 @@ public BlockStore getBlockStore() { throw new UnsupportedOperationException(); } + @Override + public WorkerNetAddress getWorkerAddress() { + throw new UnsupportedOperationException(); + } + @Override public Set> getDependencies() { return null; diff --git a/core/server/worker/src/test/java/alluxio/worker/block/PinListSyncTest.java b/core/server/worker/src/test/java/alluxio/worker/block/PinListSyncTest.java index 2e8b44920ef6..dae0717ffef1 100644 --- a/core/server/worker/src/test/java/alluxio/worker/block/PinListSyncTest.java +++ b/core/server/worker/src/test/java/alluxio/worker/block/PinListSyncTest.java @@ -44,7 +44,7 @@ public Set getPinList() { }; PinListSync sync = new PinListSync(mBlockWorker, client); - sync.heartbeat(); + sync.heartbeat(Long.MAX_VALUE); // should receive the latest pin list assertEquals(testPinLists, mBlockWorker.getPinList()); @@ -62,7 +62,7 @@ public Set getPinList() throws IOException { PinListSync sync = new PinListSync(mBlockWorker, client); // should fail - sync.heartbeat(); + sync.heartbeat(Long.MAX_VALUE); // should not get any pin list update assertEquals(ImmutableSet.of(), mBlockWorker.getPinList()); diff --git a/core/server/worker/src/test/java/alluxio/worker/block/SpecificMasterBlockSyncTest.java b/core/server/worker/src/test/java/alluxio/worker/block/SpecificMasterBlockSyncTest.java new file mode 100644 index 000000000000..e88385f2ae56 --- /dev/null +++ b/core/server/worker/src/test/java/alluxio/worker/block/SpecificMasterBlockSyncTest.java @@ -0,0 +1,248 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import alluxio.ClientContext; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.FailedToAcquireRegisterLeaseException; +import alluxio.grpc.Command; +import alluxio.grpc.CommandType; +import alluxio.grpc.ConfigProperty; +import alluxio.grpc.Metric; +import alluxio.master.MasterClientContext; +import alluxio.master.SingleMasterInquireClient; +import alluxio.retry.RetryPolicy; +import alluxio.wire.WorkerNetAddress; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.mockito.Mockito; + +import java.io.File; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +public class SpecificMasterBlockSyncTest { + @Rule + public TemporaryFolder mTestFolder = new TemporaryFolder(); + + @Test + public void heartbeatThread() throws Exception { + int heartbeatReportCapacityThreshold = 3; + Configuration.set(PropertyKey.WORKER_BLOCK_HEARTBEAT_REPORT_SIZE_THRESHOLD, + heartbeatReportCapacityThreshold); + BlockHeartbeatReporter blockHeartbeatReporter = new TestBlockHeartbeatReporter(); + + // Flaky registration succeeds every other time. + TestBlockMasterClient.INSTANCE.setFlakyRegistration(true); + TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(false); + + SpecificMasterBlockSync sync = new SpecificMasterBlockSync( + getMockedBlockWorker(), TestBlockMasterClient.INSTANCE, blockHeartbeatReporter + ); + assertFalse(sync.isRegistered()); + + // heartbeat registers the worker if it has not been registered. + sync.heartbeat(Long.MAX_VALUE); + assertTrue(sync.isRegistered()); + + // heartbeat returning register command resets the worker state. + Configuration.set(PropertyKey.WORKER_REGISTER_STREAM_ENABLED, true); + TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(true); + sync.heartbeat(Long.MAX_VALUE); + TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(false); + assertFalse(sync.isRegistered()); + + Configuration.set(PropertyKey.WORKER_REGISTER_STREAM_ENABLED, false); + TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(true); + sync.heartbeat(Long.MAX_VALUE); + TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(false); + assertFalse(sync.isRegistered()); + + // heartbeat registers the worker if it has not been registered. + sync.heartbeat(Long.MAX_VALUE); + assertTrue(sync.isRegistered()); + + // TestBlockHeartbeatReporter generates the report with one more removed block id each time. + // The heartbeat should retry 3 times before it succeeds because + // heartbeatReportCapacityThreshold is 3. + TestBlockMasterClient.INSTANCE.mHeartbeatCallCount = 0; + TestBlockMasterClient.INSTANCE.setHeartbeatError(true); + sync.heartbeat(Long.MAX_VALUE); + assertFalse(sync.isRegistered()); + assertEquals( + heartbeatReportCapacityThreshold, TestBlockMasterClient.INSTANCE.mHeartbeatCallCount); + + // registration should happen on the next heartbeat and the reporter should be cleared, + // except the newly generated ones. + TestBlockMasterClient.INSTANCE.setHeartbeatError(false); + sync.heartbeat(Long.MAX_VALUE); + assertTrue(sync.isRegistered()); + assertEquals(1, blockHeartbeatReporter.generateReportAndClear().getBlockChangeCount()); + + assertTrue(TestBlockMasterClient.INSTANCE.mRegisterCalled); + assertTrue(TestBlockMasterClient.INSTANCE.mRegisterWithStreamCalled); + } + + private static class TestBlockHeartbeatReporter extends BlockHeartbeatReporter { + AtomicInteger mId = new AtomicInteger(0); + + @Override + public BlockHeartbeatReport generateReportAndClear() { + // On generation, add one block each time. + onRemoveBlockByWorker(mId.incrementAndGet()); + return super.generateReportAndClear(); + } + } + + private static class TestBlockMasterClient extends BlockMasterClient { + public static final TestBlockMasterClient INSTANCE = new TestBlockMasterClient(); + + private boolean mLastRegisterSuccess = true; + private boolean mFlakyRegistration = false; + private boolean mReturnRegisterCommand = false; + private boolean mHeartbeatFailed = false; + + private boolean mRegisterCalled = false; + + private boolean mRegisterWithStreamCalled = false; + private int mHeartbeatCallCount = 0; + + public void setFlakyRegistration(boolean value) { + mFlakyRegistration = value; + } + + public void setReturnRegisterCommand(boolean value) { + mReturnRegisterCommand = value; + } + + public void setHeartbeatError(boolean value) { + mHeartbeatFailed = value; + } + + public TestBlockMasterClient() { + super(MasterClientContext + .newBuilder(ClientContext.create(Configuration.global())) + .setMasterInquireClient(new SingleMasterInquireClient( + InetSocketAddress.createUnresolved("localhost", 0))).build()); + } + + @Override + public void register( + long workerId, List storageTierAliases, + Map totalBytesOnTiers, Map usedBytesOnTiers, + Map> currentBlocksOnLocation, + Map> lostStorage, List configList) + throws IOException { + if (!mFlakyRegistration) { + return; + } + if (mLastRegisterSuccess) { + mLastRegisterSuccess = false; + throw new IOException("Registration failed"); + } else { + mLastRegisterSuccess = true; + mRegisterCalled = true; + } + } + + @Override + public void registerWithStream( + long workerId, List storageTierAliases, + Map totalBytesOnTiers, + Map usedBytesOnTiers, + Map> currentBlocksOnLocation, + Map> lostStorage, + List configList) throws IOException { + if (!mFlakyRegistration) { + return; + } + if (mLastRegisterSuccess) { + mLastRegisterSuccess = false; + throw new IOException("Registration failed"); + } else { + mLastRegisterSuccess = true; + mRegisterWithStreamCalled = true; + } + } + + @Override + public synchronized Command heartbeat( + long workerId, Map capacityBytesOnTiers, + Map usedBytesOnTiers, + List removedBlocks, + Map> addedBlocks, + Map> lostStorage, + List metrics) throws IOException { + mHeartbeatCallCount++; + if (mHeartbeatFailed) { + throw new IOException("heartbeat failed"); + } + if (mReturnRegisterCommand) { + return Command.newBuilder().setCommandType(CommandType.Register).build(); + } + return Command.newBuilder().setCommandType(CommandType.Nothing).build(); + } + + @Override + public void acquireRegisterLeaseWithBackoff( + long workerId, int estimatedBlockCount, RetryPolicy retry) + throws IOException, FailedToAcquireRegisterLeaseException { + } + + @Override + public void notifyWorkerId(long workerId, WorkerNetAddress address) throws IOException { + } + } + + public BlockMasterClientPool mClientPool = new BlockMasterClientPool() { + @Override + public BlockMasterClient acquire() { + return TestBlockMasterClient.INSTANCE; + } + + @Override + public void release(BlockMasterClient resource) { + } + }; + + private BlockWorker getMockedBlockWorker() throws Exception { + File tempFolder = mTestFolder.newFolder(); + BlockMetadataManager metadataManager = + TieredBlockStoreTestUtils.defaultMetadataManager(tempFolder.getAbsolutePath()); + + BlockWorker blockWorker = Mockito.mock(BlockWorker.class); + Mockito.when(blockWorker.getStoreMetaFull()) + .thenReturn(metadataManager.getBlockStoreMetaFull()); + Mockito.when(blockWorker.getStoreMeta()) + .thenReturn(metadataManager.getBlockStoreMetaFull()); + Mockito.when(blockWorker.getReport()) + .thenReturn(new BlockHeartbeatReport(Collections.emptyMap(), + Collections.emptyList(), Collections.emptyMap())); + Mockito.when(blockWorker.getWorkerAddress()) + .thenReturn(new WorkerNetAddress()); + Mockito.when(blockWorker.getWorkerId()) + .thenReturn(new AtomicReference<>(0L)); + return blockWorker; + } +} diff --git a/core/server/worker/src/test/java/alluxio/worker/block/TieredBlockStoreTestUtils.java b/core/server/worker/src/test/java/alluxio/worker/block/TieredBlockStoreTestUtils.java index 15ef31e70f3b..ff8b8939742b 100644 --- a/core/server/worker/src/test/java/alluxio/worker/block/TieredBlockStoreTestUtils.java +++ b/core/server/worker/src/test/java/alluxio/worker/block/TieredBlockStoreTestUtils.java @@ -335,7 +335,7 @@ public static void cache2(long sessionId, long blockId, long bytes, StorageDir d cache2(sessionId, blockId, bytes, dir, meta, (BlockStoreEventListener) null); if (iterator != null) { for (BlockStoreEventListener listener : iterator.getListeners()) { - listener.onCommitBlock(blockId, dir.toBlockStoreLocation()); + listener.onCommitBlockToLocal(blockId, dir.toBlockStoreLocation()); } } } @@ -360,7 +360,7 @@ public static void cache2(long sessionId, long blockId, long bytes, StorageDir d // update iterator if a listener. if (listener != null) { - listener.onCommitBlock(blockId, dir.toBlockStoreLocation()); + listener.onCommitBlockToLocal(blockId, dir.toBlockStoreLocation()); } } diff --git a/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockReaderTest.java b/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockReaderTest.java index 19d1fd222652..d8a938078b71 100644 --- a/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockReaderTest.java +++ b/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockReaderTest.java @@ -239,7 +239,7 @@ public void sequentialTransferMultipleTimes() throws Exception { } private static UfsBlockReadOptions createUfsBlockOptions(String ufsPath) { - return new UfsBlockReadOptions(MOUNT_ID, OFFSET_IN_FILE, ufsPath, true); + return new UfsBlockReadOptions(MOUNT_ID, OFFSET_IN_FILE, ufsPath, true, null); } private static void createTempUfsBlock(Path destPath, long blockSize) throws Exception { diff --git a/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockStoreCommitBlockTest.java b/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockStoreCommitBlockTest.java new file mode 100644 index 000000000000..531979303d0f --- /dev/null +++ b/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockStoreCommitBlockTest.java @@ -0,0 +1,235 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.page; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyLong; +import static org.mockito.Mockito.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import alluxio.Constants; +import alluxio.client.file.cache.CacheManager; +import alluxio.client.file.cache.CacheManagerOptions; +import alluxio.client.file.cache.evictor.CacheEvictorOptions; +import alluxio.client.file.cache.evictor.FIFOCacheEvictor; +import alluxio.client.file.cache.store.PageStoreDir; +import alluxio.client.file.cache.store.PageStoreOptions; +import alluxio.client.file.cache.store.PageStoreType; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.Configuration; +import alluxio.conf.InstancedConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.exception.status.AlluxioStatusException; +import alluxio.master.NoopUfsManager; +import alluxio.underfs.UfsManager; +import alluxio.util.CommonUtils; +import alluxio.worker.block.AbstractBlockStoreEventListener; +import alluxio.worker.block.BlockMasterClient; +import alluxio.worker.block.BlockMasterClientPool; +import alluxio.worker.block.BlockStoreEventListener; +import alluxio.worker.block.BlockStoreLocation; +import alluxio.worker.block.CreateBlockOptions; +import alluxio.worker.block.io.BlockWriter; + +import com.google.common.collect.ImmutableList; +import io.grpc.Status; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicReference; + +public class PagedBlockStoreCommitBlockTest { + BlockStoreEventListener mListener; + UfsManager mUfs; + AlluxioConfiguration mConf; + CacheManagerOptions mCacheManagerOptions; + PagedBlockMetaStore mPageMetaStore; + List mDirs; + PagedBlockStore mPagedBlockStore; + BlockMasterClientPool mBlockMasterClientPool; + BlockMasterClient mMockedBlockMasterClient; + AtomicReference mWorkerId; + + CacheManager mCacheManager; + + private static final int DIR_INDEX = 0; + + private static final Long SESSION_ID = 1L; + private static final long BLOCK_ID = 2L; + final int mBlockSize = 64; + + public int mPageSize = 2; + + private static final int OFFSET = 0; + + @Rule + public TemporaryFolder mTempFolder = new TemporaryFolder(); + + @Before + public void setup() throws Exception { + List pageStoreDirs; + InstancedConfiguration cacheManagerConf = Configuration.copyGlobal(); + + Path dirPath = mTempFolder.newFolder().toPath(); + InstancedConfiguration dirConf = Configuration.modifiableGlobal(); + dirConf.set(PropertyKey.WORKER_PAGE_STORE_DIRS, ImmutableList.of(dirPath)); + dirConf.set(PropertyKey.WORKER_PAGE_STORE_SIZES, ImmutableList.of(Constants.MB)); + dirConf.set(PropertyKey.WORKER_PAGE_STORE_TYPE, PageStoreType.LOCAL); + PageStoreDir pageStoreDir = + PageStoreDir.createPageStoreDir( + new CacheEvictorOptions().setEvictorClass(FIFOCacheEvictor.class), + PageStoreOptions.createForWorkerPageStore(dirConf).get(DIR_INDEX)); + + mUfs = new NoopUfsManager(); + mConf = Configuration.global(); + cacheManagerConf.set(PropertyKey.WORKER_PAGE_STORE_PAGE_SIZE, mPageSize); + cacheManagerConf.set(PropertyKey.WORKER_PAGE_STORE_DIRS, ImmutableList.of(dirPath)); + + // Here mock BlockMasterClientPool and BlockMasterClient since I have no idea + // about how to override them. + // mockedPool will return a mocked BlockMasterClient when require() is called, + // and do nothing when releasing, maybe add some action later on. + mBlockMasterClientPool = mock(BlockMasterClientPool.class); + mMockedBlockMasterClient = mock(BlockMasterClient.class); + when(mBlockMasterClientPool.acquire()).thenReturn(mMockedBlockMasterClient); + doNothing().when(mBlockMasterClientPool).release(any()); + mWorkerId = new AtomicReference<>(-1L); + mCacheManagerOptions = CacheManagerOptions.createForWorker(cacheManagerConf); + pageStoreDirs = new ArrayList(); + pageStoreDirs.add(pageStoreDir); + mDirs = PagedBlockStoreDir.fromPageStoreDirs(pageStoreDirs); + + mListener = spy(new AbstractBlockStoreEventListener() { + @Override + public void onCommitBlockToLocal(long blockId, BlockStoreLocation location) { + assertEquals(BLOCK_ID, blockId); + } + + @Override + public void onCommitBlockToMaster(long blockId, BlockStoreLocation location) { + assertEquals(BLOCK_ID, blockId); + } + }); + } + + @After + public void tearDown() throws IOException { + mPagedBlockStore.close(); + } + + // This Test case success both to commit, no Exception should be thrown, + // and both onCommit method should be called + @Test + public void localCommitAndMasterCommitBothSuccess() + throws IOException, InterruptedException, TimeoutException { + mPageMetaStore = new PagedBlockMetaStore(mDirs); + mCacheManager = CacheManager.Factory.create(mConf, mCacheManagerOptions, mPageMetaStore); + + mPagedBlockStore = new PagedBlockStore(mCacheManager, mUfs, mBlockMasterClientPool, mWorkerId, + mPageMetaStore, mCacheManagerOptions.getPageSize()); + + prepareBlockStore(); + + mPagedBlockStore.commitBlock(SESSION_ID, BLOCK_ID, false); + verify(mListener).onCommitBlockToLocal(anyLong(), any(BlockStoreLocation.class)); + verify(mListener).onCommitBlockToMaster(anyLong(), any(BlockStoreLocation.class)); + } + + // This Test case success commitToMaster, expecting one exception, + + @Test + public void localCommitFailAndMasterCommitSuccess() + throws IOException, InterruptedException, TimeoutException { + mPageMetaStore = new PagedBlockMetaStore(mDirs) { + // here commit always throw Exception + @Override + public PagedBlockMeta commit(long BLOCK_ID) { + throw new RuntimeException(); + } + }; + mCacheManager = CacheManager.Factory.create(mConf, mCacheManagerOptions, mPageMetaStore); + + mPagedBlockStore = new PagedBlockStore(mCacheManager, mUfs, mBlockMasterClientPool, + mWorkerId, mPageMetaStore, mCacheManagerOptions.getPageSize()); + + prepareBlockStore(); + + assertThrows(RuntimeException.class, () -> { + mPagedBlockStore.commitBlock(SESSION_ID, BLOCK_ID, false); + }); + + verify(mListener, never()).onCommitBlockToLocal(anyLong(), any(BlockStoreLocation.class)); + verify(mListener, never()).onCommitBlockToMaster(anyLong(), any(BlockStoreLocation.class)); + } + + // This Test case success commitToLocal, expecting one exception, + // and only one onCommit method should be called. + @Test + public void localCommitSuccessAndMasterCommitFail() + throws IOException, InterruptedException, TimeoutException { + doAnswer((i) -> { + throw new AlluxioStatusException(Status.UNAVAILABLE); + }).when(mMockedBlockMasterClient).commitBlock(anyLong(), anyLong(), anyString(), + anyString(), anyLong(), anyLong()); + mPageMetaStore = new PagedBlockMetaStore(mDirs); + mCacheManager = CacheManager.Factory.create(mConf, mCacheManagerOptions, mPageMetaStore); + + mPagedBlockStore = new PagedBlockStore(mCacheManager, mUfs, mBlockMasterClientPool, mWorkerId, + mPageMetaStore, mCacheManagerOptions.getPageSize()); + + prepareBlockStore(); + + assertThrows(RuntimeException.class, () -> { + mPagedBlockStore.commitBlock(SESSION_ID, BLOCK_ID, false); + }); + verify(mListener).onCommitBlockToLocal(anyLong(), any(BlockStoreLocation.class)); + verify(mListener, never()).onCommitBlockToMaster(anyLong(), any(BlockStoreLocation.class)); + } + + // Prepare PageBlockStore and creat a temp block for following test + public void prepareBlockStore() throws IOException, InterruptedException, TimeoutException { + PagedBlockStoreDir dir = + (PagedBlockStoreDir) mPageMetaStore.allocate(BlockPageId.tempFileIdOf(BLOCK_ID), 1); + + dir.putTempFile(BlockPageId.tempFileIdOf(BLOCK_ID)); + PagedTempBlockMeta blockMeta = new PagedTempBlockMeta(BLOCK_ID, dir); + mPagedBlockStore.createBlock(SESSION_ID, BLOCK_ID, OFFSET, + new CreateBlockOptions(null, null, mBlockSize)); + byte[] data = new byte[mBlockSize]; + Arrays.fill(data, (byte) 1); + ByteBuffer buf = ByteBuffer.wrap(data); + BlockWriter writer = mPagedBlockStore.createBlockWriter(SESSION_ID, BLOCK_ID); + CommonUtils.waitFor("writer initiation complete", + () -> mPagedBlockStore.getCacheManagerState() == CacheManager.State.READ_WRITE); + writer.append(buf); + + mPagedBlockStore.registerBlockStoreEventListener(mListener); + } +} diff --git a/core/transport/src/main/proto/grpc/block_master.proto b/core/transport/src/main/proto/grpc/block_master.proto index 1740e49a834b..a49801092b76 100644 --- a/core/transport/src/main/proto/grpc/block_master.proto +++ b/core/transport/src/main/proto/grpc/block_master.proto @@ -69,6 +69,7 @@ message WorkerInfo { map capacityBytesOnTiers = 8; map usedBytesOnTiers = 9; optional BuildVersion buildVersion = 10; + optional int32 numVCpu = 11; } enum WorkerRange { @@ -91,6 +92,7 @@ enum WorkerInfoField { WORKER_USED_BYTES_ON_TIERS = 9; BLOCK_COUNT = 10; BUILD_VERSION = 11; + NUM_VCPU = 12; } message GetWorkerReportPOptions { @@ -109,16 +111,24 @@ message WorkerLostStorageInfo { /** a map from tier alias to the lost storage paths */ map lostStorage = 2; } -message RemoveDecommissionedWorkerPOptions { - optional string workerName = 1; +message RemoveDisabledWorkerPOptions { + required string workerHostname = 1; + optional int64 workerWebPort = 2; } -message RemoveDecommissionedWorkerPResponse {} +message RemoveDisabledWorkerPResponse {} message GetWorkerLostStoragePOptions {} message GetWorkerLostStoragePResponse { repeated WorkerLostStorageInfo workerLostStorageInfo = 1; } +message DecommissionWorkerPResponse {} +message DecommissionWorkerPOptions { + required string workerHostname = 1; + optional int64 workerWebPort = 2; + optional bool canRegisterAgain = 3; +} + /** * This interface contains block master service endpoints for Alluxio clients. */ @@ -153,8 +163,8 @@ service BlockMasterClientService { * If target worker is in the decommissioned worker set, * return true, remove target worker from decommissioned worker set; else, return false. */ - rpc RemoveDecommissionedWorker(RemoveDecommissionedWorkerPOptions) - returns (RemoveDecommissionedWorkerPResponse); + rpc RemoveDisabledWorker(RemoveDisabledWorkerPOptions) + returns (RemoveDisabledWorkerPResponse); /** * Returns a list of workers information for report CLI. @@ -165,6 +175,11 @@ service BlockMasterClientService { * Returns a list of worker lost storage information */ rpc GetWorkerLostStorage(GetWorkerLostStoragePOptions) returns (GetWorkerLostStoragePResponse); + + /** + * Decommission the specific worker from Alluxio. + */ + rpc DecommissionWorker(DecommissionWorkerPOptions) returns (DecommissionWorkerPResponse); } message TierList { @@ -240,6 +255,16 @@ message CommitBlockInUfsPRequest { message CommitBlockInUfsPOptions {} message CommitBlockInUfsPResponse {} +message NotifyWorkerIdPOptions {} +message NotifyWorkerIdPRequest { + optional int64 workerId = 1; + /** the worker network address */ + optional grpc.WorkerNetAddress workerNetAddress = 2; + optional NotifyWorkerIdPOptions options = 3; +} +message NotifyWorkerIdPResponse { +} + message GetWorkerIdPOptions {} message GetWorkerIdPRequest { /** the worker network address */ @@ -263,16 +288,11 @@ message GetRegisterLeasePResponse { optional GetRegisterLeasePOptions options = 4; } -message BuildVersion { - /** the project version of the worker */ - optional string version = 1; - /** the git revision at the time of building the worker */ - optional string revision = 2; -} message RegisterWorkerPOptions { repeated grpc.ConfigProperty configs = 1; /** the worker version to display in info pages (useful for rolling upgrades) */ optional BuildVersion buildVersion = 2; + optional int32 numVCpu = 3; } message RegisterWorkerPRequest { /** the id of the worker */ @@ -318,6 +338,11 @@ service BlockMasterWorkerService { */ rpc GetWorkerId(GetWorkerIdPRequest) returns (GetWorkerIdPResponse); + /** + * Notify all masters about the worker ID. + */ + rpc NotifyWorkerId(NotifyWorkerIdPRequest) returns (NotifyWorkerIdPResponse); + /** * Registers a worker. */ diff --git a/core/transport/src/main/proto/grpc/block_worker.proto b/core/transport/src/main/proto/grpc/block_worker.proto index 3f5d2a5d6667..a7591f99deb3 100644 --- a/core/transport/src/main/proto/grpc/block_worker.proto +++ b/core/transport/src/main/proto/grpc/block_worker.proto @@ -105,10 +105,11 @@ message WriteRequest { } // The write response. -// next available id: 2 +// next available id: 3 message WriteResponse { optional int64 offset = 1; // Errors will be handled by standard gRPC stream APIs. + optional string contentHash = 2; } // Request for caching a block asynchronously @@ -149,6 +150,7 @@ message UfsReadOptions{ // We introduce a heuristic to choose which API to use. required bool position_short = 2; optional int64 bandwidth = 3; + optional string user = 4; } diff --git a/core/transport/src/main/proto/grpc/common.proto b/core/transport/src/main/proto/grpc/common.proto index de020d5063ae..e89c4c869738 100644 --- a/core/transport/src/main/proto/grpc/common.proto +++ b/core/transport/src/main/proto/grpc/common.proto @@ -65,6 +65,8 @@ enum MetricType { // METER represents a metric value at a _rate_. The value of the metric varies with the time over which events are // recorded METER = 2; + // HISTOGRAM gives statistics about the value of past occurrences of an event. + HISTOGRAM = 5; // TIMER represents a histogram of the rate of the specified events. TIMER = 3; // EXECUTOR_SERVICE represents an executor service. @@ -78,6 +80,8 @@ enum CommandType { Free = 3; // Ask the worker to free files. Delete = 4; // Ask the worker to delete files. Persist = 5; // Ask the worker to persist a file for lineage + Decommissioned = 6; // Notify the worker that it has been decommissioned + Disabled = 7; // Notify the worker that it has been disabled } message ConfigProperty { @@ -89,6 +93,7 @@ message ConfigProperty { enum TtlAction { DELETE = 0; // Delete the file after TTL expires. FREE = 1; // Free the file after TTL expires. + DELETE_ALLUXIO = 2; // Delete the data and metadata in Alluxio after TTL expires. } message Command { @@ -144,3 +149,10 @@ enum ErrorType { Internal = 1; External = 2; } + +message BuildVersion { + /** the project version */ + optional string version = 1; + /** the git revision at the time of building */ + optional string revision = 2; +} diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index 211adc60a34f..b0a9c56ce2a5 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -82,6 +82,7 @@ message CompleteFilePOptions { optional int64 ufsLength = 1; optional ScheduleAsyncPersistencePOptions asyncPersistOptions = 2; optional FileSystemMasterCommonPOptions commonOptions = 3; + optional string contentHash = 4; } message CompleteFilePRequest { /** the path of the file */ @@ -94,6 +95,11 @@ message OpenFilePOptions { optional int32 maxUfsReadConcurrency = 2; optional FileSystemMasterCommonPOptions commonOptions = 3; optional bool updateLastAccessTime = 4 [default = true]; + // If specified and the blocks are not cached in any worker, + // the data will be read and cached to the certain worker. + // If the blocks have been cached in some alluxio workers, + // this field will be ignored. + optional grpc.WorkerNetAddress ufsReadWorkerLocation = 15; } // XAttrPropagationStrategy controls the behaviour for assigning xAttr @@ -114,6 +120,7 @@ message CreateDirectoryPOptions { optional FileSystemMasterCommonPOptions commonOptions = 5; map xattr = 6; optional XAttrPropagationStrategy xattrPropStrat = 7 [default = NEW_PATHS]; + optional bool checkS3BucketPath = 8; } message CreateDirectoryPRequest { /** the path of the directory */ @@ -137,6 +144,10 @@ message CreateFilePOptions { optional int64 persistenceWaitTime = 10; map xattr = 11; optional XAttrPropagationStrategy xattrPropStrat = 12 [default = NEW_PATHS]; + optional bool overwrite = 13; + optional bool checkS3BucketPath = 14; + // If specified, the data will be written to the certain worker + optional grpc.WorkerNetAddress workerLocation = 15; } message CreateFilePRequest { /** the path of the file */ @@ -150,6 +161,7 @@ message DeletePOptions { optional bool alluxioOnly = 2; optional bool unchecked = 3; optional FileSystemMasterCommonPOptions commonOptions = 4; + optional bool syncParentNextTime = 5; optional bool deleteMountPoint = 6; } message DeletePRequest { @@ -190,6 +202,7 @@ message GetStatusPOptions { optional FileSystemMasterCommonPOptions commonOptions = 2; optional Bits accessMode = 3; optional bool updateTimestamps = 4 [default = true]; + optional bool includeRealContentHash = 5; } message GetStatusPRequest { /** the path of the file or directory */ @@ -229,6 +242,17 @@ message ListStatusPOptions { optional bool recursive = 4; // No data will be transferred. optional bool loadMetadataOnly = 5; + // Setting this to true will disable checking during metadata sync to see if the children + // of a directory has been loaded. This will avoid a costly full traversal of the file + // system during recursive listings, but may result in the children of directories not + // being loaded. It is recommended to set this to true after the first call of a + // recursive partial listing. + optional bool disableAreDescendantsLoadedCheck = 6; + // Mount info will be excluded from the list status response if this field is set to true. + // Resolving a path and obtain the mount info is an expensive operation. + // For clients that do not need this information such as hadoop-compatible clients, + // excluding mount info improves the endpoint performance. + optional bool excludeMountInfo = 7; } message ListStatusPRequest { /** the path of the file or directory */ @@ -255,7 +279,7 @@ message ListStatusPartialPOptions { // the ListStatusPartialPRequest is a prefix of startAfter (e.g. if listing "/dir", // then startAfter could be "/dir/next"). Otherwise if start after does not start with "/", // then startAfter is appended to the path given in the ListStatusPartialPRequest - // (e.g. if the listing path is "/dir" and startAfter is "/after" then files that + // (e.g. if the listing path is "/dir" and startAfter is "after" then files that // start after "/dir/after" in lexicographic order will be listed). // The full path itself does not need to exist. // This offset type is recommended to use if POSIX compatible listings are needed. @@ -387,6 +411,7 @@ message MountPOptions { map properties = 2; optional bool shared = 3; optional FileSystemMasterCommonPOptions commonOptions = 4; + optional bool remount = 5; } message MountPRequest { /** the path of alluxio mount point */ @@ -473,6 +498,7 @@ message SetAttributePOptions { repeated string pinnedMedia = 10; map xattr = 11; optional alluxio.proto.journal.XAttrUpdateStrategy xattrUpdateStrategy = 12; + optional bool directChildrenLoaded = 13; } message SetAttributePRequest { /** the path of the file */ @@ -585,6 +611,125 @@ message NeedsSyncRequest { message NeedsSyncResponse {} +message SubmitJobPRequest{ + optional bytes request_body = 1; +} + +message SubmitJobPResponse { + optional string jobId = 1; +} + +message LoadJobPOptions { + optional int64 bandwidth = 1; + optional bool verify = 2; + optional bool partialListing = 3; +} + +message CopyJobPOptions { + optional int64 bandwidth = 1; + optional bool verify = 2; + optional bool partialListing = 3; + optional bool overwrite = 4; +} + +message StopJobPRequest { + required JobDescription jobDescription = 1; +} + +message StopJobPResponse { + optional bool jobStopped = 1; +} + +enum JobProgressReportFormat { + TEXT = 1; + JSON = 2; +} + +message JobDescription{ + required string type = 1; + optional string path = 2; +} + +message JobProgressPOptions { + optional JobProgressReportFormat format = 1; + optional bool verbose = 2; +} + +message GetJobProgressPRequest { + required JobDescription jobDescription = 1; + optional JobProgressPOptions options = 2; +} + +message GetJobProgressPResponse { + optional string progressReport = 1; + optional JobProgressReportFormat format = 2; +} + +message SyncMetadataPOptions { + optional fscommon.LoadDescendantPType loadDescendantType = 1; + optional fscommon.DirectoryLoadPType directoryLoadType = 2; +} + +message SyncMetadataPRequest { + required string path = 1; + optional SyncMetadataPOptions options = 2; +} + +enum SyncMetadataState { + UNKNOWN = 0; + RUNNING = 1; + SUCCEEDED = 2; + FAILED = 3; + CANCELED = 4; +} + +message SyncMetadataTask { + message Exception { + optional string exceptionType = 1; + optional string exceptionMessage = 2; + optional string stacktrace = 3; + } + + optional int64 id = 1; + optional SyncMetadataState state = 2; + optional int64 syncDurationMs = 3; + optional Exception exception = 4; + optional int64 successOpCount = 5; + + optional string taskInfoString = 100; + optional string taskStatString = 101; + +} + +message SyncMetadataPResponse { + repeated SyncMetadataTask task = 1; + + optional string debugInfo = 1000; +} + +message SyncMetadataAsyncPResponse { + optional bool submitted = 1; + optional int64 taskGroupId = 2; + repeated int64 taskIds = 3; +} + +message GetSyncProgressPRequest { + optional int64 taskGroupId = 1; +} + +message GetSyncProgressPResponse { + repeated SyncMetadataTask task = 1; + + optional string debugInfo = 1000; +} + +message CancelSyncMetadataPRequest { + optional int64 taskGroupId = 1; +} + +message CancelSyncMetadataPResponse { + optional bool success = 1; +} /** * This interface contains file system master service endpoints for Alluxio clients. @@ -731,6 +876,23 @@ service FileSystemMasterClientService { rpc GetStateLockHolders(GetStateLockHoldersPRequest) returns (GetStateLockHoldersPResponse); rpc NeedsSync(NeedsSyncRequest) returns (NeedsSyncResponse); + + /** + * Load a directory into Alluxio. + */ + rpc submitJob(SubmitJobPRequest) returns (SubmitJobPResponse); + + rpc StopJob(StopJobPRequest) returns (StopJobPResponse); + + rpc GetJobProgress(GetJobProgressPRequest) returns (GetJobProgressPResponse); + + /** + * Load metadata from up into Alluxio. + */ + rpc SyncMetadata(SyncMetadataPRequest) returns (SyncMetadataPResponse); + rpc SyncMetadataAsync(SyncMetadataPRequest) returns (SyncMetadataAsyncPResponse); + rpc GetSyncProgress(GetSyncProgressPRequest) returns (GetSyncProgressPResponse); + rpc CancelSyncMetadata(CancelSyncMetadataPRequest) returns (CancelSyncMetadataPResponse); } message FileSystemHeartbeatPResponse { diff --git a/core/transport/src/main/proto/grpc/fscommon.proto b/core/transport/src/main/proto/grpc/fscommon.proto index 458b5e72df1d..d9d8510242b4 100644 --- a/core/transport/src/main/proto/grpc/fscommon.proto +++ b/core/transport/src/main/proto/grpc/fscommon.proto @@ -11,3 +11,9 @@ enum LoadDescendantPType { ONE = 1; ALL = 2; } + +enum DirectoryLoadPType { + SINGLE_LISTING = 0; + BFS = 1; + DFS = 2; +} diff --git a/core/transport/src/main/proto/grpc/job_master.proto b/core/transport/src/main/proto/grpc/job_master.proto index ad5f553cf7de..7f2ee9be8182 100644 --- a/core/transport/src/main/proto/grpc/job_master.proto +++ b/core/transport/src/main/proto/grpc/job_master.proto @@ -107,6 +107,7 @@ message JobWorkerHealth { optional int32 taskPoolSize = 5; optional int32 numActiveTasks = 6; optional int32 unfinishedTasks = 7; + optional grpc.BuildVersion version = 8; } message JobCommand { @@ -197,6 +198,21 @@ message GetAllWorkerHealthPResponse { repeated JobWorkerHealth workerHealths = 1; } +message JobMasterStatus { + optional string state = 1; + optional grpc.NetAddress masterAddress = 2; + optional int64 startTime = 3; + optional grpc.BuildVersion version = 4; +} + +message GetAllMasterStatusPOptions {} +message GetAllMasterStatusPRequest { + optional GetAllMasterStatusPOptions options = 1; +} +message GetAllMasterStatusPResponse { + repeated JobMasterStatus jobMasterStatus = 1; +} + message SubmitOptions {} message SubmitRequest { optional bytes cmdConfig = 1; @@ -275,6 +291,11 @@ service JobMasterClientService { */ rpc GetAllWorkerHealth(GetAllWorkerHealthPRequest) returns (GetAllWorkerHealthPResponse); + /** + * Lists all job master status. + */ + rpc GetAllMasterStatus(GetAllMasterStatusPRequest) returns (GetAllMasterStatusPResponse); + /** * Submit a CMD job, return a jobControlId. */ @@ -305,6 +326,7 @@ message RegisterJobWorkerPOptions {} message RegisterJobWorkerPRequest { optional grpc.WorkerNetAddress workerNetAddress = 1; optional RegisterJobWorkerPOptions options = 2; + optional grpc.BuildVersion version = 3; } message RegisterJobWorkerPResponse { optional int64 id = 1; @@ -325,3 +347,62 @@ service JobMasterWorkerService { */ rpc RegisterJobWorker(RegisterJobWorkerPRequest) returns (RegisterJobWorkerPResponse); } + +message GetJobMasterIdPOptions {} +message GetJobMasterIdPRequest { + optional grpc.NetAddress masterAddress = 1; + optional GetJobMasterIdPOptions options = 2; +} +message GetJobMasterIdPResponse { + optional int64 masterId = 1; +} + +enum JobMasterMetaCommand { + MetaCommand_Unknown = 0; + MetaCommand_Nothing = 1; + MetaCommand_Register = 2; // Ask the standby master to re-register. +} + +message RegisterJobMasterPOptions { + optional int64 startTimeMs = 2; + optional int64 losePrimacyTimeMs = 3; + optional grpc.BuildVersion version = 4; +} + +message RegisterJobMasterPRequest { + optional int64 jobMasterId = 1; + optional RegisterJobMasterPOptions options = 2; +} +message RegisterJobMasterPResponse {} + +message JobMasterHeartbeatPOptions { +} +message JobMasterHeartbeatPRequest { + optional int64 masterId = 1; + optional JobMasterHeartbeatPOptions options = 2; +} +message JobMasterHeartbeatPResponse { + optional JobMasterMetaCommand command = 1; +} + +/** + * This interface contains meta master service endpoints for Alluxio standby masters. + */ +service JobMasterMasterService { + + /** + * Returns a master id for the given master address. + */ + rpc GetMasterId(GetJobMasterIdPRequest) returns (GetJobMasterIdPResponse); + + /** + * Registers a master. + */ + rpc RegisterMaster(RegisterJobMasterPRequest) returns (RegisterJobMasterPResponse); + + /** + * Heartbeats to indicate the master is lost or not. + */ + rpc MasterHeartbeat(JobMasterHeartbeatPRequest) returns (JobMasterHeartbeatPResponse); +} + diff --git a/core/transport/src/main/proto/grpc/meta_master.proto b/core/transport/src/main/proto/grpc/meta_master.proto index c21022e66378..f896b34f6d5e 100644 --- a/core/transport/src/main/proto/grpc/meta_master.proto +++ b/core/transport/src/main/proto/grpc/meta_master.proto @@ -22,6 +22,8 @@ message GetConfigurationPResponse{ map pathConfigs = 2; optional string clusterConfigHash = 3; optional string pathConfigHash = 4; + optional int64 clusterConfigLastUpdateTime = 5; + optional int64 pathConfigLastUpdateTime = 6; } enum ConfigStatus { @@ -77,6 +79,13 @@ message MasterInfo { optional string clusterId = 11; optional bool raftJournal = 12; repeated string raftAddress = 13; + repeated MasterVersion masterVersions = 14; +} + +message MasterVersion { + optional grpc.NetAddress addresses = 1; + optional string version = 2; + optional string state = 3; } enum MasterInfoField { @@ -93,6 +102,7 @@ enum MasterInfoField { CLUSTER_ID = 10; RAFT_JOURNAL = 11; RAFT_ADDRESSES = 12; + MASTER_VERSION = 13; } message GetMasterInfoPOptions { @@ -140,6 +150,25 @@ message BackupStatusPRequest { optional string backupId = 1; } +message ProxyStatus { + optional grpc.NetAddress address = 1; + optional string state = 2; + optional int64 startTime = 3; + optional int64 lastHeartbeatTime = 4; + optional grpc.BuildVersion version = 5; +} + +message ListProxyStatusPRequest { + optional ListProxyStatusPOptions options = 1; +} + +message ListProxyStatusPOptions { +} + +message ListProxyStatusPResponse { + repeated ProxyStatus proxyStatuses = 1; +} + /** * This interface contains meta master service endpoints for Alluxio clients. */ @@ -168,6 +197,11 @@ service MetaMasterClientService { * Creates a checkpoint in the primary master journal system. */ rpc Checkpoint(CheckpointPOptions) returns (CheckpointPResponse); + + /** + * Returns the status of all known Proxy instances in the cluster. + */ + rpc ListProxyStatus(ListProxyStatusPRequest) returns (ListProxyStatusPResponse); } message SetPathConfigurationPOptions {} @@ -237,6 +271,10 @@ enum MetaCommand { message RegisterMasterPOptions { repeated grpc.ConfigProperty configs = 1; + optional int64 startTimeMs = 2; + optional int64 losePrimacyTimeMs = 3; + optional string version = 4; + optional string revision = 5; } message RegisterMasterPRequest { optional int64 masterId = 1; @@ -244,7 +282,10 @@ message RegisterMasterPRequest { } message RegisterMasterPResponse {} -message MasterHeartbeatPOptions {} +message MasterHeartbeatPOptions { + optional int64 lastCheckpointTime = 1; + optional int64 journalEntriesSinceCheckpoint = 2; +} message MasterHeartbeatPRequest { optional int64 masterId = 1; optional MasterHeartbeatPOptions options = 2; @@ -281,3 +322,24 @@ service MetaMasterMasterService { */ rpc MasterHeartbeat(MasterHeartbeatPRequest) returns (MasterHeartbeatPResponse); } + +message ProxyHeartbeatPOptions { + optional grpc.NetAddress proxyAddress = 1; + optional int64 startTime = 2; + optional grpc.BuildVersion version = 3; +} +message ProxyHeartbeatPRequest { + optional ProxyHeartbeatPOptions options = 1; +} +message ProxyHeartbeatPResponse { +} + +/** + * This interface contains meta master service endpoints for Alluxio Proxy instances. + */ +service MetaMasterProxyService { + /** + * Stateless heartbeat from proxy instances to report the current status. + */ + rpc ProxyHeartbeat(ProxyHeartbeatPRequest) returns (ProxyHeartbeatPResponse); +} diff --git a/core/transport/src/main/proto/grpc/raft_journal.proto b/core/transport/src/main/proto/grpc/raft_journal.proto index c6f6dc5b2001..dee7f92cddbe 100644 --- a/core/transport/src/main/proto/grpc/raft_journal.proto +++ b/core/transport/src/main/proto/grpc/raft_journal.proto @@ -9,13 +9,14 @@ package alluxio.grpc.meta; import "grpc/common.proto"; message JournalQueryRequest { - optional GetSnapshotInfoRequest snapshotInfoRequest = 1; - optional GetSnapshotRequest snapshotRequest = 2; + optional GetSnapshotInfoRequest snapshotInfoRequest = 1 [deprecated = true]; + optional GetSnapshotRequest snapshotRequest = 2 [deprecated = true]; optional AddQuorumServerRequest addQuorumServerRequest = 3; } message JournalQueryResponse { - optional GetSnapshotInfoResponse snapshotInfoResponse = 1; + option deprecated = true; + optional GetSnapshotInfoResponse snapshotInfoResponse = 1 [deprecated = true]; } message AddQuorumServerRequest { @@ -23,45 +24,55 @@ message AddQuorumServerRequest { } message GetSnapshotInfoRequest { - optional SnapshotMetadata snapshotInfo = 1; + option deprecated = true; + optional SnapshotMetadata snapshotInfo = 1 [deprecated = true]; } message GetSnapshotInfoResponse { - optional SnapshotMetadata latest = 1; + option deprecated = true; + optional SnapshotMetadata latest = 1 [deprecated = true]; } message GetSnapshotRequest { + option deprecated = true; } message SnapshotMetadata { optional int64 snapshotTerm = 1; optional int64 snapshotIndex = 2; + optional bool exists = 3; } message SnapshotData { optional int64 snapshotTerm = 1; optional int64 snapshotIndex = 2; optional bytes chunk = 3; - optional int64 offset = 4; - optional bool eof = 5; + optional int64 offset = 4 [deprecated = true]; + optional bool eof = 5 [deprecated = true]; } message UploadSnapshotPRequest { - optional SnapshotData data = 1; + option deprecated = true; + optional SnapshotData data = 1 [deprecated = true]; } message UploadSnapshotPResponse { - optional int64 offsetReceived = 1; + option deprecated = true; + optional int64 offsetReceived = 1 [deprecated = true]; } message DownloadSnapshotPRequest { - optional int64 offsetReceived = 1; + option deprecated = true; + optional int64 offsetReceived = 1 [deprecated = true]; } message DownloadSnapshotPResponse { - optional SnapshotData data = 1; + option deprecated = true; + optional SnapshotData data = 1 [deprecated = true]; } +message LatestSnapshotInfoPRequest {} + /** * This interface contains raft service endpoints for Alluxio masters. */ @@ -70,10 +81,24 @@ service RaftJournalService { /** * Uploads a snapshot to primary master. */ - rpc UploadSnapshot (stream UploadSnapshotPRequest) returns (stream UploadSnapshotPResponse); + rpc UploadSnapshot (stream UploadSnapshotPRequest) returns (stream UploadSnapshotPResponse) { + option deprecated = true; + }; /** * Downloads a snapshot from primary master. */ - rpc DownloadSnapshot (stream DownloadSnapshotPRequest) returns (stream DownloadSnapshotPResponse); + rpc DownloadSnapshot (stream DownloadSnapshotPRequest) returns (stream DownloadSnapshotPResponse) { + option deprecated = true; + }; + + /** + * Requests information about snapshots on a particular machine. + */ + rpc RequestLatestSnapshotInfo(LatestSnapshotInfoPRequest) returns (SnapshotMetadata) {} + + /** + * Request to download the snapshot information from a particular machine. + */ + rpc RequestLatestSnapshotData(SnapshotMetadata) returns (stream SnapshotData) {} } diff --git a/core/transport/src/main/proto/grpc/version.proto b/core/transport/src/main/proto/grpc/version.proto index 8126353ecd9e..de22bfe008b3 100644 --- a/core/transport/src/main/proto/grpc/version.proto +++ b/core/transport/src/main/proto/grpc/version.proto @@ -22,9 +22,11 @@ enum ServiceType { META_MASTER_CONFIG_SERVICE = 6; META_MASTER_CLIENT_SERVICE = 7; META_MASTER_MASTER_SERVICE = 8; + META_MASTER_PROXY_SERVICE = 18; METRICS_MASTER_CLIENT_SERVICE = 9; JOB_MASTER_CLIENT_SERVICE = 10; JOB_MASTER_WORKER_SERVICE = 11; + JOB_MASTER_MASTER_SERVICE = 19; JOURNAL_MASTER_CLIENT_SERVICE = 13; TABLE_MASTER_CLIENT_SERVICE = 14; META_MASTER_BACKUP_MESSAGING_SERVICE = 15; @@ -34,6 +36,27 @@ enum ServiceType { message GetServiceVersionPRequest { optional ServiceType serviceType = 1; + // The purpose of this field is to make grpc service on standby masters work without + // making client changes and keeps backwards compatibility. + // This requests to this endpoint will be rejected on standby masters by default, + // unless this field is set. + // Two places use this request: + // 1. PollingMasterInquireClient uses this endpoint to tell who is the primary master. + // 2. AbstractClient uses this endpoint to verify the version before it RPCs with the master. + // + // Behaviors: + // 1. old clients -> new cluster standby masters + // PollingMasterInquireClient does not set this field and is able to tell which one is primary master because + // the request will be rejected on the standby master. + // AbstractClient does not set this field. + // Old clients only connects to primary so this doesn't break the existing behavior. + // + // 2. new clients -> new cluster standby masters + // PollingMasterInquireClient does not set this field and is able to tell which one is primary master because + // the request will be rejected on the standby master. + // AbstractClient sets this field to true. Rpcs to standby masters can go through and pass the version verification. + + optional bool allowedOnStandbyMasters = 2; } message GetServiceVersionPResponse { optional int64 version = 1; diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 03c42fd97bd6..1966d9f90bd7 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -112,6 +112,10 @@ { "name": "BUILD_VERSION", "integer": 11 + }, + { + "name": "NUM_VCPU", + "integer": 12 } ] } @@ -287,6 +291,11 @@ "id": 10, "name": "buildVersion", "type": "BuildVersion" + }, + { + "id": 11, + "name": "numVCpu", + "type": "int32" } ], "maps": [ @@ -365,17 +374,22 @@ ] }, { - "name": "RemoveDecommissionedWorkerPOptions", + "name": "RemoveDisabledWorkerPOptions", "fields": [ { "id": 1, - "name": "workerName", + "name": "workerHostname", "type": "string" + }, + { + "id": 2, + "name": "workerWebPort", + "type": "int64" } ] }, { - "name": "RemoveDecommissionedWorkerPResponse" + "name": "RemoveDisabledWorkerPResponse" }, { "name": "GetWorkerLostStoragePOptions" @@ -391,6 +405,29 @@ } ] }, + { + "name": "DecommissionWorkerPResponse" + }, + { + "name": "DecommissionWorkerPOptions", + "fields": [ + { + "id": 1, + "name": "workerHostname", + "type": "string" + }, + { + "id": 2, + "name": "workerWebPort", + "type": "int64" + }, + { + "id": 3, + "name": "canRegisterAgain", + "type": "bool" + } + ] + }, { "name": "TierList", "fields": [ @@ -595,6 +632,32 @@ { "name": "CommitBlockInUfsPResponse" }, + { + "name": "NotifyWorkerIdPOptions" + }, + { + "name": "NotifyWorkerIdPRequest", + "fields": [ + { + "id": 1, + "name": "workerId", + "type": "int64" + }, + { + "id": 2, + "name": "workerNetAddress", + "type": "grpc.WorkerNetAddress" + }, + { + "id": 3, + "name": "options", + "type": "NotifyWorkerIdPOptions" + } + ] + }, + { + "name": "NotifyWorkerIdPResponse" + }, { "name": "GetWorkerIdPOptions" }, @@ -666,21 +729,6 @@ } ] }, - { - "name": "BuildVersion", - "fields": [ - { - "id": 1, - "name": "version", - "type": "string" - }, - { - "id": 2, - "name": "revision", - "type": "string" - } - ] - }, { "name": "RegisterWorkerPOptions", "fields": [ @@ -694,6 +742,11 @@ "id": 2, "name": "buildVersion", "type": "BuildVersion" + }, + { + "id": 3, + "name": "numVCpu", + "type": "int32" } ] }, @@ -792,9 +845,9 @@ "out_type": "GetWorkerInfoListPResponse" }, { - "name": "RemoveDecommissionedWorker", - "in_type": "RemoveDecommissionedWorkerPOptions", - "out_type": "RemoveDecommissionedWorkerPResponse" + "name": "RemoveDisabledWorker", + "in_type": "RemoveDisabledWorkerPOptions", + "out_type": "RemoveDisabledWorkerPResponse" }, { "name": "GetWorkerReport", @@ -805,6 +858,11 @@ "name": "GetWorkerLostStorage", "in_type": "GetWorkerLostStoragePOptions", "out_type": "GetWorkerLostStoragePResponse" + }, + { + "name": "DecommissionWorker", + "in_type": "DecommissionWorkerPOptions", + "out_type": "DecommissionWorkerPResponse" } ] }, @@ -831,6 +889,11 @@ "in_type": "GetWorkerIdPRequest", "out_type": "GetWorkerIdPResponse" }, + { + "name": "NotifyWorkerId", + "in_type": "NotifyWorkerIdPRequest", + "out_type": "NotifyWorkerIdPResponse" + }, { "name": "RegisterWorker", "in_type": "RegisterWorkerPRequest", @@ -1061,6 +1124,11 @@ "id": 1, "name": "offset", "type": "int64" + }, + { + "id": 2, + "name": "contentHash", + "type": "string" } ] }, @@ -1162,6 +1230,11 @@ "id": 3, "name": "bandwidth", "type": "int64" + }, + { + "id": 4, + "name": "user", + "type": "string" } ] }, @@ -1513,6 +1586,10 @@ "name": "METER", "integer": 2 }, + { + "name": "HISTOGRAM", + "integer": 5 + }, { "name": "TIMER", "integer": 3 @@ -1548,6 +1625,14 @@ { "name": "Persist", "integer": 5 + }, + { + "name": "Decommissioned", + "integer": 6 + }, + { + "name": "Disabled", + "integer": 7 } ] }, @@ -1560,6 +1645,10 @@ { "name": "FREE", "integer": 1 + }, + { + "name": "DELETE_ALLUXIO", + "integer": 2 } ] }, @@ -1838,6 +1927,21 @@ "type": "ErrorType" } ] + }, + { + "name": "BuildVersion", + "fields": [ + { + "id": 1, + "name": "version", + "type": "string" + }, + { + "id": 2, + "name": "revision", + "type": "string" + } + ] } ], "package": { @@ -2038,6 +2142,43 @@ "integer": 3 } ] + }, + { + "name": "JobProgressReportFormat", + "enum_fields": [ + { + "name": "TEXT", + "integer": 1 + }, + { + "name": "JSON", + "integer": 2 + } + ] + }, + { + "name": "SyncMetadataState", + "enum_fields": [ + { + "name": "UNKNOWN" + }, + { + "name": "RUNNING", + "integer": 1 + }, + { + "name": "SUCCEEDED", + "integer": 2 + }, + { + "name": "FAILED", + "integer": 3 + }, + { + "name": "CANCELED", + "integer": 4 + } + ] } ], "messages": [ @@ -2195,6 +2336,11 @@ "id": 3, "name": "commonOptions", "type": "FileSystemMasterCommonPOptions" + }, + { + "id": 4, + "name": "contentHash", + "type": "string" } ] }, @@ -2241,6 +2387,11 @@ "value": "true" } ] + }, + { + "id": 15, + "name": "ufsReadWorkerLocation", + "type": "grpc.WorkerNetAddress" } ] }, @@ -2285,6 +2436,11 @@ "value": "NEW_PATHS" } ] + }, + { + "id": 8, + "name": "checkS3BucketPath", + "type": "bool" } ], "maps": [ @@ -2386,6 +2542,21 @@ "value": "NEW_PATHS" } ] + }, + { + "id": 13, + "name": "overwrite", + "type": "bool" + }, + { + "id": 14, + "name": "checkS3BucketPath", + "type": "bool" + }, + { + "id": 15, + "name": "workerLocation", + "type": "grpc.WorkerNetAddress" } ], "maps": [ @@ -2440,6 +2611,11 @@ "name": "commonOptions", "type": "FileSystemMasterCommonPOptions" }, + { + "id": 5, + "name": "syncParentNextTime", + "type": "bool" + }, { "id": 6, "name": "deleteMountPoint", @@ -2573,6 +2749,11 @@ "value": "true" } ] + }, + { + "id": 5, + "name": "includeRealContentHash", + "type": "bool" } ] }, @@ -2673,6 +2854,16 @@ "id": 5, "name": "loadMetadataOnly", "type": "bool" + }, + { + "id": 6, + "name": "disableAreDescendantsLoadedCheck", + "type": "bool" + }, + { + "id": 7, + "name": "excludeMountInfo", + "type": "bool" } ] }, @@ -3099,6 +3290,11 @@ "id": 4, "name": "commonOptions", "type": "FileSystemMasterCommonPOptions" + }, + { + "id": 5, + "name": "remount", + "type": "bool" } ], "maps": [ @@ -3269,6 +3465,21 @@ } ] }, + { + "name": "S3SyntaxOptions", + "fields": [ + { + "id": 1, + "name": "overwrite", + "type": "bool" + }, + { + "id": 2, + "name": "isMultipartUpload", + "type": "bool" + } + ] + }, { "name": "RenamePResponse" }, @@ -3284,6 +3495,11 @@ "id": 2, "name": "persist", "type": "bool" + }, + { + "id": 3, + "name": "s3SyntaxOptions", + "type": "S3SyntaxOptions" } ] }, @@ -3388,6 +3604,11 @@ "id": 12, "name": "xattrUpdateStrategy", "type": "alluxio.proto.journal.XAttrUpdateStrategy" + }, + { + "id": 13, + "name": "directChildrenLoaded", + "type": "bool" } ], "maps": [ @@ -3676,83 +3897,403 @@ "name": "NeedsSyncResponse" }, { - "name": "FileSystemHeartbeatPResponse", + "name": "SubmitJobPRequest", "fields": [ { "id": 1, - "name": "command", - "type": "FileSystemCommand" + "name": "request_body", + "type": "bytes" } ] }, { - "name": "FileSystemHeartbeatPOptions", + "name": "SubmitJobPResponse", "fields": [ { "id": 1, - "name": "persistedFileFingerprints", - "type": "string", - "is_repeated": true + "name": "jobId", + "type": "string" } ] }, { - "name": "FileSystemHeartbeatPRequest", + "name": "LoadJobPOptions", "fields": [ { "id": 1, - "name": "workerId", + "name": "bandwidth", "type": "int64" }, { "id": 2, - "name": "persistedFiles", - "type": "int64", - "is_repeated": true + "name": "verify", + "type": "bool" }, { "id": 3, - "name": "options", - "type": "FileSystemHeartbeatPOptions" - } - ] - }, - { - "name": "GetFileInfoPResponse", - "fields": [ - { - "id": 1, - "name": "fileInfo", - "type": "FileInfo" + "name": "partialListing", + "type": "bool" } ] }, { - "name": "GetFileInfoPOptions" - }, - { - "name": "GetFileInfoPRequest", + "name": "CopyJobPOptions", "fields": [ { "id": 1, - "name": "fileId", + "name": "bandwidth", "type": "int64" }, { "id": 2, - "name": "options", - "type": "GetFileInfoPOptions" + "name": "verify", + "type": "bool" + }, + { + "id": 3, + "name": "partialListing", + "type": "bool" + }, + { + "id": 4, + "name": "overwrite", + "type": "bool" } ] }, { - "name": "GetPinnedFileIdsPResponse", + "name": "StopJobPRequest", "fields": [ { "id": 1, - "name": "pinnedFileIds", - "type": "int64", - "is_repeated": true + "name": "jobDescription", + "type": "JobDescription" + } + ] + }, + { + "name": "StopJobPResponse", + "fields": [ + { + "id": 1, + "name": "jobStopped", + "type": "bool" + } + ] + }, + { + "name": "JobDescription", + "fields": [ + { + "id": 1, + "name": "type", + "type": "string" + }, + { + "id": 2, + "name": "path", + "type": "string" + } + ] + }, + { + "name": "JobProgressPOptions", + "fields": [ + { + "id": 1, + "name": "format", + "type": "JobProgressReportFormat" + }, + { + "id": 2, + "name": "verbose", + "type": "bool" + } + ] + }, + { + "name": "GetJobProgressPRequest", + "fields": [ + { + "id": 1, + "name": "jobDescription", + "type": "JobDescription" + }, + { + "id": 2, + "name": "options", + "type": "JobProgressPOptions" + } + ] + }, + { + "name": "GetJobProgressPResponse", + "fields": [ + { + "id": 1, + "name": "progressReport", + "type": "string" + }, + { + "id": 2, + "name": "format", + "type": "JobProgressReportFormat" + } + ] + }, + { + "name": "SyncMetadataPOptions", + "fields": [ + { + "id": 1, + "name": "loadDescendantType", + "type": "fscommon.LoadDescendantPType" + }, + { + "id": 2, + "name": "directoryLoadType", + "type": "fscommon.DirectoryLoadPType" + } + ] + }, + { + "name": "SyncMetadataPRequest", + "fields": [ + { + "id": 1, + "name": "path", + "type": "string" + }, + { + "id": 2, + "name": "options", + "type": "SyncMetadataPOptions" + } + ] + }, + { + "name": "SyncMetadataTask", + "fields": [ + { + "id": 1, + "name": "id", + "type": "int64" + }, + { + "id": 2, + "name": "state", + "type": "SyncMetadataState" + }, + { + "id": 3, + "name": "syncDurationMs", + "type": "int64" + }, + { + "id": 4, + "name": "exception", + "type": "Exception" + }, + { + "id": 5, + "name": "successOpCount", + "type": "int64" + }, + { + "id": 100, + "name": "taskInfoString", + "type": "string" + }, + { + "id": 101, + "name": "taskStatString", + "type": "string" + } + ], + "messages": [ + { + "name": "Exception", + "fields": [ + { + "id": 1, + "name": "exceptionType", + "type": "string" + }, + { + "id": 2, + "name": "exceptionMessage", + "type": "string" + }, + { + "id": 3, + "name": "stacktrace", + "type": "string" + } + ] + } + ] + }, + { + "name": "SyncMetadataPResponse", + "fields": [ + { + "id": 1, + "name": "task", + "type": "SyncMetadataTask", + "is_repeated": true + }, + { + "id": 1000, + "name": "debugInfo", + "type": "string" + } + ] + }, + { + "name": "SyncMetadataAsyncPResponse", + "fields": [ + { + "id": 1, + "name": "submitted", + "type": "bool" + }, + { + "id": 2, + "name": "taskGroupId", + "type": "int64" + }, + { + "id": 3, + "name": "taskIds", + "type": "int64", + "is_repeated": true + } + ] + }, + { + "name": "GetSyncProgressPRequest", + "fields": [ + { + "id": 1, + "name": "taskGroupId", + "type": "int64" + } + ] + }, + { + "name": "GetSyncProgressPResponse", + "fields": [ + { + "id": 1, + "name": "task", + "type": "SyncMetadataTask", + "is_repeated": true + }, + { + "id": 1000, + "name": "debugInfo", + "type": "string" + } + ] + }, + { + "name": "CancelSyncMetadataPRequest", + "fields": [ + { + "id": 1, + "name": "taskGroupId", + "type": "int64" + } + ] + }, + { + "name": "CancelSyncMetadataPResponse", + "fields": [ + { + "id": 1, + "name": "success", + "type": "bool" + } + ] + }, + { + "name": "FileSystemHeartbeatPResponse", + "fields": [ + { + "id": 1, + "name": "command", + "type": "FileSystemCommand" + } + ] + }, + { + "name": "FileSystemHeartbeatPOptions", + "fields": [ + { + "id": 1, + "name": "persistedFileFingerprints", + "type": "string", + "is_repeated": true + } + ] + }, + { + "name": "FileSystemHeartbeatPRequest", + "fields": [ + { + "id": 1, + "name": "workerId", + "type": "int64" + }, + { + "id": 2, + "name": "persistedFiles", + "type": "int64", + "is_repeated": true + }, + { + "id": 3, + "name": "options", + "type": "FileSystemHeartbeatPOptions" + } + ] + }, + { + "name": "GetFileInfoPResponse", + "fields": [ + { + "id": 1, + "name": "fileInfo", + "type": "FileInfo" + } + ] + }, + { + "name": "GetFileInfoPOptions" + }, + { + "name": "GetFileInfoPRequest", + "fields": [ + { + "id": 1, + "name": "fileId", + "type": "int64" + }, + { + "id": 2, + "name": "options", + "type": "GetFileInfoPOptions" + } + ] + }, + { + "name": "GetPinnedFileIdsPResponse", + "fields": [ + { + "id": 1, + "name": "pinnedFileIds", + "type": "int64", + "is_repeated": true } ] }, @@ -3942,6 +4483,41 @@ "name": "NeedsSync", "in_type": "NeedsSyncRequest", "out_type": "NeedsSyncResponse" + }, + { + "name": "submitJob", + "in_type": "SubmitJobPRequest", + "out_type": "SubmitJobPResponse" + }, + { + "name": "StopJob", + "in_type": "StopJobPRequest", + "out_type": "StopJobPResponse" + }, + { + "name": "GetJobProgress", + "in_type": "GetJobProgressPRequest", + "out_type": "GetJobProgressPResponse" + }, + { + "name": "SyncMetadata", + "in_type": "SyncMetadataPRequest", + "out_type": "SyncMetadataPResponse" + }, + { + "name": "SyncMetadataAsync", + "in_type": "SyncMetadataPRequest", + "out_type": "SyncMetadataAsyncPResponse" + }, + { + "name": "GetSyncProgress", + "in_type": "GetSyncProgressPRequest", + "out_type": "GetSyncProgressPResponse" + }, + { + "name": "CancelSyncMetadata", + "in_type": "CancelSyncMetadataPRequest", + "out_type": "CancelSyncMetadataPResponse" } ] }, @@ -4035,6 +4611,22 @@ "integer": 2 } ] + }, + { + "name": "DirectoryLoadPType", + "enum_fields": [ + { + "name": "SINGLE_LISTING" + }, + { + "name": "BFS", + "integer": 1 + }, + { + "name": "DFS", + "integer": 2 + } + ] } ], "package": { @@ -4141,6 +4733,22 @@ "integer": 3 } ] + }, + { + "name": "JobMasterMetaCommand", + "enum_fields": [ + { + "name": "MetaCommand_Unknown" + }, + { + "name": "MetaCommand_Nothing", + "integer": 1 + }, + { + "name": "MetaCommand_Register", + "integer": 2 + } + ] } ], "messages": [ @@ -4413,6 +5021,11 @@ "id": 7, "name": "unfinishedTasks", "type": "int32" + }, + { + "id": 8, + "name": "version", + "type": "grpc.BuildVersion" } ] }, @@ -4661,35 +5274,84 @@ ] }, { - "name": "GetJobServiceSummaryPResponse", + "name": "GetJobServiceSummaryPResponse", + "fields": [ + { + "id": 1, + "name": "summary", + "type": "JobServiceSummary" + } + ] + }, + { + "name": "GetAllWorkerHealthPOptions" + }, + { + "name": "GetAllWorkerHealthPRequest", + "fields": [ + { + "id": 1, + "name": "options", + "type": "GetAllWorkerHealthPOptions" + } + ] + }, + { + "name": "GetAllWorkerHealthPResponse", + "fields": [ + { + "id": 1, + "name": "workerHealths", + "type": "JobWorkerHealth", + "is_repeated": true + } + ] + }, + { + "name": "JobMasterStatus", "fields": [ { "id": 1, - "name": "summary", - "type": "JobServiceSummary" + "name": "state", + "type": "string" + }, + { + "id": 2, + "name": "masterAddress", + "type": "grpc.NetAddress" + }, + { + "id": 3, + "name": "startTime", + "type": "int64" + }, + { + "id": 4, + "name": "version", + "type": "grpc.BuildVersion" } ] }, { - "name": "GetAllWorkerHealthPOptions" + "name": "GetAllMasterStatusPOptions" }, { - "name": "GetAllWorkerHealthPRequest", + "name": "GetAllMasterStatusPRequest", "fields": [ { "id": 1, "name": "options", - "type": "GetAllWorkerHealthPOptions" + "type": "GetAllMasterStatusPOptions" } ] }, { - "name": "GetAllWorkerHealthPResponse", + "name": "GetAllMasterStatusPResponse", "fields": [ { "id": 1, - "name": "workerHealths", - "type": "JobWorkerHealth", + "name": "jobMasterStatus", + "type": "JobMasterStatus", "is_repeated": true } ] @@ -4874,6 +5536,11 @@ "id": 2, "name": "options", "type": "RegisterJobWorkerPOptions" + }, + { + "id": 3, + "name": "version", + "type": "grpc.BuildVersion" } ] }, @@ -4886,6 +5553,100 @@ "type": "int64" } ] + }, + { + "name": "GetJobMasterIdPOptions" + }, + { + "name": "GetJobMasterIdPRequest", + "fields": [ + { + "id": 1, + "name": "masterAddress", + "type": "grpc.NetAddress" + }, + { + "id": 2, + "name": "options", + "type": "GetJobMasterIdPOptions" + } + ] + }, + { + "name": "GetJobMasterIdPResponse", + "fields": [ + { + "id": 1, + "name": "masterId", + "type": "int64" + } + ] + }, + { + "name": "RegisterJobMasterPOptions", + "fields": [ + { + "id": 2, + "name": "startTimeMs", + "type": "int64" + }, + { + "id": 3, + "name": "losePrimacyTimeMs", + "type": "int64" + }, + { + "id": 4, + "name": "version", + "type": "grpc.BuildVersion" + } + ] + }, + { + "name": "RegisterJobMasterPRequest", + "fields": [ + { + "id": 1, + "name": "jobMasterId", + "type": "int64" + }, + { + "id": 2, + "name": "options", + "type": "RegisterJobMasterPOptions" + } + ] + }, + { + "name": "RegisterJobMasterPResponse" + }, + { + "name": "JobMasterHeartbeatPOptions" + }, + { + "name": "JobMasterHeartbeatPRequest", + "fields": [ + { + "id": 1, + "name": "masterId", + "type": "int64" + }, + { + "id": 2, + "name": "options", + "type": "JobMasterHeartbeatPOptions" + } + ] + }, + { + "name": "JobMasterHeartbeatPResponse", + "fields": [ + { + "id": 1, + "name": "command", + "type": "JobMasterMetaCommand" + } + ] } ], "services": [ @@ -4927,6 +5688,11 @@ "in_type": "GetAllWorkerHealthPRequest", "out_type": "GetAllWorkerHealthPResponse" }, + { + "name": "GetAllMasterStatus", + "in_type": "GetAllMasterStatusPRequest", + "out_type": "GetAllMasterStatusPResponse" + }, { "name": "Submit", "in_type": "SubmitRequest", @@ -4958,6 +5724,26 @@ "out_type": "RegisterJobWorkerPResponse" } ] + }, + { + "name": "JobMasterMasterService", + "rpcs": [ + { + "name": "GetMasterId", + "in_type": "GetJobMasterIdPRequest", + "out_type": "GetJobMasterIdPResponse" + }, + { + "name": "RegisterMaster", + "in_type": "RegisterJobMasterPRequest", + "out_type": "RegisterJobMasterPResponse" + }, + { + "name": "MasterHeartbeat", + "in_type": "JobMasterHeartbeatPRequest", + "out_type": "JobMasterHeartbeatPResponse" + } + ] } ], "imports": [ @@ -5440,6 +6226,10 @@ { "name": "RAFT_ADDRESSES", "integer": 12 + }, + { + "name": "MASTER_VERSION", + "integer": 13 } ] }, @@ -5539,6 +6329,16 @@ "id": 4, "name": "pathConfigHash", "type": "string" + }, + { + "id": 5, + "name": "clusterConfigLastUpdateTime", + "type": "int64" + }, + { + "id": 6, + "name": "pathConfigLastUpdateTime", + "type": "int64" } ], "maps": [ @@ -5706,6 +6506,32 @@ "name": "raftAddress", "type": "string", "is_repeated": true + }, + { + "id": 14, + "name": "masterVersions", + "type": "MasterVersion", + "is_repeated": true + } + ] + }, + { + "name": "MasterVersion", + "fields": [ + { + "id": 1, + "name": "addresses", + "type": "grpc.NetAddress" + }, + { + "id": 2, + "name": "version", + "type": "string" + }, + { + "id": 3, + "name": "state", + "type": "string" } ] }, @@ -5828,6 +6654,60 @@ } ] }, + { + "name": "ProxyStatus", + "fields": [ + { + "id": 1, + "name": "address", + "type": "grpc.NetAddress" + }, + { + "id": 2, + "name": "state", + "type": "string" + }, + { + "id": 3, + "name": "startTime", + "type": "int64" + }, + { + "id": 4, + "name": "lastHeartbeatTime", + "type": "int64" + }, + { + "id": 5, + "name": "version", + "type": "grpc.BuildVersion" + } + ] + }, + { + "name": "ListProxyStatusPRequest", + "fields": [ + { + "id": 1, + "name": "options", + "type": "ListProxyStatusPOptions" + } + ] + }, + { + "name": "ListProxyStatusPOptions" + }, + { + "name": "ListProxyStatusPResponse", + "fields": [ + { + "id": 1, + "name": "proxyStatuses", + "type": "ProxyStatus", + "is_repeated": true + } + ] + }, { "name": "SetPathConfigurationPOptions" }, @@ -5940,6 +6820,26 @@ "name": "configs", "type": "grpc.ConfigProperty", "is_repeated": true + }, + { + "id": 2, + "name": "startTimeMs", + "type": "int64" + }, + { + "id": 3, + "name": "losePrimacyTimeMs", + "type": "int64" + }, + { + "id": 4, + "name": "version", + "type": "string" + }, + { + "id": 5, + "name": "revision", + "type": "string" } ] }, @@ -5962,7 +6862,19 @@ "name": "RegisterMasterPResponse" }, { - "name": "MasterHeartbeatPOptions" + "name": "MasterHeartbeatPOptions", + "fields": [ + { + "id": 1, + "name": "lastCheckpointTime", + "type": "int64" + }, + { + "id": 2, + "name": "journalEntriesSinceCheckpoint", + "type": "int64" + } + ] }, { "name": "MasterHeartbeatPRequest", @@ -6014,6 +6926,39 @@ } } ] + }, + { + "name": "ProxyHeartbeatPOptions", + "fields": [ + { + "id": 1, + "name": "proxyAddress", + "type": "grpc.NetAddress" + }, + { + "id": 2, + "name": "startTime", + "type": "int64" + }, + { + "id": 3, + "name": "version", + "type": "grpc.BuildVersion" + } + ] + }, + { + "name": "ProxyHeartbeatPRequest", + "fields": [ + { + "id": 1, + "name": "options", + "type": "ProxyHeartbeatPOptions" + } + ] + }, + { + "name": "ProxyHeartbeatPResponse" } ], "services": [ @@ -6044,6 +6989,11 @@ "name": "Checkpoint", "in_type": "CheckpointPOptions", "out_type": "CheckpointPResponse" + }, + { + "name": "ListProxyStatus", + "in_type": "ListProxyStatusPRequest", + "out_type": "ListProxyStatusPResponse" } ] }, @@ -6096,6 +7046,16 @@ "out_type": "MasterHeartbeatPResponse" } ] + }, + { + "name": "MetaMasterProxyService", + "rpcs": [ + { + "name": "ProxyHeartbeat", + "in_type": "ProxyHeartbeatPRequest", + "out_type": "ProxyHeartbeatPResponse" + } + ] } ], "imports": [ @@ -6265,12 +7225,24 @@ { "id": 1, "name": "snapshotInfoRequest", - "type": "GetSnapshotInfoRequest" + "type": "GetSnapshotInfoRequest", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "id": 2, "name": "snapshotRequest", - "type": "GetSnapshotRequest" + "type": "GetSnapshotRequest", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "id": 3, @@ -6285,7 +7257,19 @@ { "id": 1, "name": "snapshotInfoResponse", - "type": "GetSnapshotInfoResponse" + "type": "GetSnapshotInfoResponse", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] }, @@ -6305,7 +7289,19 @@ { "id": 1, "name": "snapshotInfo", - "type": "SnapshotMetadata" + "type": "SnapshotMetadata", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] }, @@ -6315,12 +7311,30 @@ { "id": 1, "name": "latest", - "type": "SnapshotMetadata" + "type": "SnapshotMetadata", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] }, { - "name": "GetSnapshotRequest" + "name": "GetSnapshotRequest", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "name": "SnapshotMetadata", @@ -6334,6 +7348,11 @@ "id": 2, "name": "snapshotIndex", "type": "int64" + }, + { + "id": 3, + "name": "exists", + "type": "bool" } ] }, @@ -6358,12 +7377,24 @@ { "id": 4, "name": "offset", - "type": "int64" + "type": "int64", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "id": 5, "name": "eof", - "type": "bool" + "type": "bool", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] } ] }, @@ -6373,7 +7404,19 @@ { "id": 1, "name": "data", - "type": "SnapshotData" + "type": "SnapshotData", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] }, @@ -6383,7 +7426,19 @@ { "id": 1, "name": "offsetReceived", - "type": "int64" + "type": "int64", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] }, @@ -6393,7 +7448,19 @@ { "id": 1, "name": "offsetReceived", - "type": "int64" + "type": "int64", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] }, @@ -6403,9 +7470,24 @@ { "id": 1, "name": "data", - "type": "SnapshotData" + "type": "SnapshotData", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] + }, + { + "name": "LatestSnapshotInfoPRequest" } ], "services": [ @@ -6417,13 +7499,36 @@ "in_type": "UploadSnapshotPRequest", "out_type": "UploadSnapshotPResponse", "in_streamed": true, - "out_streamed": true + "out_streamed": true, + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "name": "DownloadSnapshot", "in_type": "DownloadSnapshotPRequest", "out_type": "DownloadSnapshotPResponse", "in_streamed": true, + "out_streamed": true, + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + }, + { + "name": "RequestLatestSnapshotInfo", + "in_type": "LatestSnapshotInfoPRequest", + "out_type": "SnapshotMetadata" + }, + { + "name": "RequestLatestSnapshotData", + "in_type": "SnapshotMetadata", + "out_type": "SnapshotData", "out_streamed": true } ] @@ -7983,6 +9088,10 @@ "name": "META_MASTER_MASTER_SERVICE", "integer": 8 }, + { + "name": "META_MASTER_PROXY_SERVICE", + "integer": 18 + }, { "name": "METRICS_MASTER_CLIENT_SERVICE", "integer": 9 @@ -7995,6 +9104,10 @@ "name": "JOB_MASTER_WORKER_SERVICE", "integer": 11 }, + { + "name": "JOB_MASTER_MASTER_SERVICE", + "integer": 19 + }, { "name": "JOURNAL_MASTER_CLIENT_SERVICE", "integer": 13 @@ -8032,6 +9145,11 @@ "id": 1, "name": "serviceType", "type": "ServiceType" + }, + { + "id": 2, + "name": "allowedOnStandbyMasters", + "type": "bool" } ] }, @@ -8358,6 +9476,11 @@ "id": 2, "name": "length", "type": "int64" + }, + { + "id": 3, + "name": "block_location", + "type": "grpc.BlockLocation" } ] }, @@ -8372,6 +9495,11 @@ ] } ], + "imports": [ + { + "path": "grpc/common.proto" + } + ], "package": { "name": "alluxio.proto.journal" } @@ -8411,6 +9539,10 @@ { "name": "FREE", "integer": 1 + }, + { + "name": "DELETE_ALLUXIO", + "integer": 2 } ] }, @@ -9379,6 +10511,139 @@ } } }, + { + "protopath": "proto:/:journal:/:job.proto", + "def": { + "enums": [ + { + "name": "PJobState", + "enum_fields": [ + { + "name": "CREATED", + "integer": 1 + }, + { + "name": "STOPPED", + "integer": 2 + }, + { + "name": "SUCCEEDED", + "integer": 3 + }, + { + "name": "FAILED", + "integer": 4 + } + ] + } + ], + "messages": [ + { + "name": "LoadJobEntry", + "fields": [ + { + "id": 1, + "name": "load_path", + "type": "string" + }, + { + "id": 2, + "name": "state", + "type": "PJobState" + }, + { + "id": 3, + "name": "bandwidth", + "type": "int64" + }, + { + "id": 4, + "name": "verify", + "type": "bool" + }, + { + "id": 5, + "name": "user", + "type": "string" + }, + { + "id": 6, + "name": "partialListing", + "type": "bool" + }, + { + "id": 7, + "name": "job_id", + "type": "string" + }, + { + "id": 8, + "name": "end_time", + "type": "int64" + } + ] + }, + { + "name": "CopyJobEntry", + "fields": [ + { + "id": 1, + "name": "src", + "type": "string" + }, + { + "id": 2, + "name": "dst", + "type": "string" + }, + { + "id": 3, + "name": "state", + "type": "PJobState" + }, + { + "id": 4, + "name": "bandwidth", + "type": "int64" + }, + { + "id": 5, + "name": "verify", + "type": "bool" + }, + { + "id": 6, + "name": "user", + "type": "string" + }, + { + "id": 7, + "name": "partialListing", + "type": "bool" + }, + { + "id": 8, + "name": "job_id", + "type": "string" + }, + { + "id": 9, + "name": "end_time", + "type": "int64" + }, + { + "id": 10, + "name": "overwrite", + "type": "int64" + } + ] + } + ], + "package": { + "name": "alluxio.proto.journal" + } + } + }, { "protopath": "proto:/:journal:/:journal.proto", "def": { @@ -9591,6 +10856,16 @@ "name": "update_inode_file", "type": "UpdateInodeFileEntry" }, + { + "id": 53, + "name": "load_job", + "type": "LoadJobEntry" + }, + { + "id": 54, + "name": "copy_job", + "type": "CopyJobEntry" + }, { "id": 39, "name": "journal_entries", @@ -9612,6 +10887,9 @@ }, { "path": "proto/journal/table.proto" + }, + { + "path": "proto/journal/job.proto" } ], "package": { diff --git a/core/transport/src/main/proto/proto/journal/block.proto b/core/transport/src/main/proto/proto/journal/block.proto index 3a605bde8a7e..0a03eca15196 100644 --- a/core/transport/src/main/proto/proto/journal/block.proto +++ b/core/transport/src/main/proto/proto/journal/block.proto @@ -2,6 +2,8 @@ syntax = "proto2"; package alluxio.proto.journal; +import "grpc/common.proto"; + // Journal entry messages for the block master. // next available id: 2 @@ -13,6 +15,7 @@ message BlockContainerIdGeneratorEntry { message BlockInfoEntry { optional int64 block_id = 1; optional int64 length = 2; + optional grpc.BlockLocation block_location = 3; } // next available id: 2 diff --git a/core/transport/src/main/proto/proto/journal/file.proto b/core/transport/src/main/proto/proto/journal/file.proto index c59840a2d2ec..6d5b789723a9 100644 --- a/core/transport/src/main/proto/proto/journal/file.proto +++ b/core/transport/src/main/proto/proto/journal/file.proto @@ -172,6 +172,7 @@ message InodeDirectoryIdGeneratorEntry { enum PTtlAction { DELETE = 0; FREE = 1; + DELETE_ALLUXIO = 2; } // next available id: 30 diff --git a/core/transport/src/main/proto/proto/journal/job.proto b/core/transport/src/main/proto/proto/journal/job.proto new file mode 100644 index 000000000000..dc7f5df48e3d --- /dev/null +++ b/core/transport/src/main/proto/proto/journal/job.proto @@ -0,0 +1,37 @@ +syntax = "proto2"; + +package alluxio.proto.journal; + +// Journal entry messages for the block master. +enum PJobState { + CREATED = 1; + STOPPED = 2; + SUCCEEDED = 3; + FAILED = 4; +} + +// next available id: 9 +message LoadJobEntry { + required string load_path = 1; + required PJobState state = 2; + optional int64 bandwidth = 3; + required bool verify = 4; + optional string user = 5; + required bool partialListing = 6; + required string job_id = 7; + optional int64 end_time = 8; +} + +// next available id: 11 +message CopyJobEntry { + required string src = 1; + required string dst = 2; + required PJobState state = 3; + optional int64 bandwidth = 4; + required bool verify = 5; + optional string user = 6; + required bool partialListing = 7; + required string job_id = 8; + optional int64 end_time = 9; + optional int64 overwrite = 10; +} diff --git a/core/transport/src/main/proto/proto/journal/journal.proto b/core/transport/src/main/proto/proto/journal/journal.proto index 31ed4bcdda90..3b91319a1ece 100644 --- a/core/transport/src/main/proto/proto/journal/journal.proto +++ b/core/transport/src/main/proto/proto/journal/journal.proto @@ -7,6 +7,7 @@ import "proto/journal/block.proto"; import "proto/journal/file.proto"; import "proto/journal/meta.proto"; import "proto/journal/table.proto"; +import "proto/journal/job.proto"; // Wraps around all types of Alluxio journal entries. // @@ -24,7 +25,7 @@ message JournalOpPId { optional int64 leastSignificantBits = 2; } -// next available id: 53 +// next available id: 54 message JournalEntry { // shared fields. optional int64 sequence_number = 1; @@ -66,6 +67,8 @@ message JournalEntry { optional UpdateInodeEntry update_inode = 35; optional UpdateInodeDirectoryEntry update_inode_directory = 36; optional UpdateInodeFileEntry update_inode_file = 37; + optional LoadJobEntry load_job = 53; + optional CopyJobEntry copy_job = 54; // This journal entry is a list of other entries. when a journal entry // contains other journal entries, all other optional fields must be unset. diff --git a/dev/intellij/runConfigurations/AlluxioFuse.xml b/dev/intellij/runConfigurations/AlluxioFuse.xml index 8932ba8179ff..5c0325a6b1fe 100644 --- a/dev/intellij/runConfigurations/AlluxioFuse.xml +++ b/dev/intellij/runConfigurations/AlluxioFuse.xml @@ -15,7 +15,7 @@